In [1]:
# Data Loading & Initial Inspection
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set display options for better visualization
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Load the dataset
file_path = r"D:\Market Projects\options_data_analyzer\Aug '25\Aug 07 Exp\07 Aug\24400CE.csv"

# Read CSV with proper datetime parsing
df = pd.read_csv(file_path, parse_dates=['date'])

print("=== DATASET LOADED SUCCESSFULLY ===")
print(f"File: 24400CE.csv")
print(f"Path: {file_path}")
print()

# 1. Check data shape
print("=== DATA SHAPE ===")
print(f"Rows: {df.shape[0]:,}")
print(f"Columns: {df.shape[1]}")
print()

# 2. Check data types
print("=== DATA TYPES ===")
print(df.dtypes)
print()

# 3. Check for null/missing values
print("=== NULL VALUES CHECK ===")
null_counts = df.isnull().sum()
if null_counts.sum() == 0:
    print("✅ No null values found in the dataset")
else:
    print("❌ Null values found:")
    print(null_counts[null_counts > 0])
print()

# 4. Preview first few rows
print("=== FIRST 10 ROWS ===")
print(df.head(10))
print()

# 5. Preview last few rows
print("=== LAST 5 ROWS ===")
print(df.tail())
print()

# 6. Basic statistics
print("=== BASIC STATISTICS ===")
print(df.describe())
print()

# 7. Column information
print("=== COLUMN INFORMATION ===")
print("Columns in the dataset:")
for i, col in enumerate(df.columns, 1):
    print(f"{i}. {col}")
print()

# 8. Date range information
print("=== DATE RANGE ===")
print(f"Start Date: {df['date'].min()}")
print(f"End Date: {df['date'].max()}")
print(f"Total Time Span: {df['date'].max() - df['date'].min()}")
print()

# 9. Memory usage
print("=== MEMORY USAGE ===")
memory_usage = df.memory_usage(deep=True)
print(f"Total Memory Usage: {memory_usage.sum() / 1024 / 1024:.2f} MB")
print("Memory usage per column:")
for col in df.columns:
    print(f"  {col}: {memory_usage[col] / 1024:.2f} KB")

print("\n" + "="*50)
print("✅ DATASET READY FOR ANALYSIS")
print("="*50)

=== DATASET LOADED SUCCESSFULLY ===
File: 24400CE.csv
Path: D:\Market Projects\options_data_analyzer\Aug '25\Aug 07 Exp\07 Aug\24400CE.csv

=== DATA SHAPE ===
Rows: 64,430
Columns: 5

=== DATA TYPES ===
date         datetime64[ns]
price               float64
qty                   int64
trnvr               float64
cum_trnvr           float64
dtype: object

=== NULL VALUES CHECK ===
✅ No null values found in the dataset

=== FIRST 10 ROWS ===
                 date   price   qty       trnvr   cum_trnvr
0 2025-08-07 09:15:00  132.40  1125   148950.00   148950.00
1 2025-08-07 09:15:00  132.55     0        0.00   148950.00
2 2025-08-07 09:15:00  132.65  1425   189026.25   337976.25
3 2025-08-07 09:15:00  137.20     0        0.00   337976.25
4 2025-08-07 09:15:00  133.80     0        0.00   337976.25
5 2025-08-07 09:15:00  130.90     0        0.00   337976.25
6 2025-08-07 09:15:01  133.60     0        0.00   337976.25
7 2025-08-07 09:15:01  132.70  9525  1263967.50  1601943.75
8 2025-08-07 09