In [1]:
import pandas as pd
import os

# Define the file path
file_path = r"D:\Market Projects\options_data_analyzer\Aug '25\Aug 07 Exp\07 Aug\BEL_EQ.csv"

# Load CSV with pandas
# Using default encoding (utf-8) and comma delimiter
# The file appears to have standard CSV format
df = pd.read_csv(file_path)

# Display basic information about the loaded data
print("Data loaded successfully!")
print(f"File path: {file_path}")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
print("\nBasic statistics:")
print(df.describe())

Data loaded successfully!
File path: D:\Market Projects\options_data_analyzer\Aug '25\Aug 07 Exp\07 Aug\BEL_EQ.csv
Shape: (18250, 5)
Columns: ['date', 'price', 'qty', 'trnvr', 'cum_trnvr']

First few rows:
                     date   price    qty        trnvr    cum_trnvr
0  2025-08-07 09:15:00 AM  386.85  65740  25431519.00  25431519.00
1  2025-08-07 09:15:01 AM  386.65      0         0.00  25431519.00
2  2025-08-07 09:15:01 AM  386.30      0         0.00  25431519.00
3  2025-08-07 09:15:01 AM  386.30    895    345738.50  25777257.50
4  2025-08-07 09:15:01 AM  386.75   1401    541836.75  26319094.25

Data types:
date          object
price        float64
qty            int64
trnvr        float64
cum_trnvr    float64
dtype: object

Basic statistics:
              price            qty         trnvr     cum_trnvr
count  18250.000000   18250.000000  1.825000e+04  1.825000e+04
mean     386.537181     630.694301  2.439435e+05  2.317732e+09
std        1.570300    2651.792039  1.026096e+06  1.

In [2]:
# Preview first and last 10 rows to check ordering
print("=== FIRST 10 ROWS ===")
print(df.head(10))
print("\n" + "="*50 + "\n")
print("=== LAST 10 ROWS ===")
print(df.tail(10))

# Check if data is in chronological order
print("\n" + "="*50 + "\n")
print("=== CHRONOLOGICAL ORDER CHECK ===")

# Convert date column to datetime if not already
df['date'] = pd.to_datetime(df['date'])

# Check first and last timestamps
first_time = df['date'].iloc[0]
last_time = df['date'].iloc[-1]

print(f"First timestamp: {first_time}")
print(f"Last timestamp: {last_time}")

# Check if chronological (ascending) or reverse chronological (descending)
if first_time < last_time:
    print("✓ Data is in CHRONOLOGICAL order (ascending)")
    print("  - First row: Earliest time")
    print("  - Last row: Latest time")
else:
    print("✗ Data is in REVERSE CHRONOLOGICAL order (descending)")
    print("  - First row: Latest time")
    print("  - Last row: Earliest time")

# Show time range
time_range = last_time - first_time
print(f"\nTotal time range: {time_range}")

=== FIRST 10 ROWS ===
                     date   price    qty        trnvr    cum_trnvr
0  2025-08-07 09:15:00 AM  386.85  65740  25431519.00  25431519.00
1  2025-08-07 09:15:01 AM  386.65      0         0.00  25431519.00
2  2025-08-07 09:15:01 AM  386.30      0         0.00  25431519.00
3  2025-08-07 09:15:01 AM  386.30    895    345738.50  25777257.50
4  2025-08-07 09:15:01 AM  386.75   1401    541836.75  26319094.25
5  2025-08-07 09:15:02 AM  386.80   1795    694306.00  27013400.25
6  2025-08-07 09:15:02 AM  386.95    741    286729.95  27300130.20
7  2025-08-07 09:15:03 AM  386.85      0         0.00  27300130.20
8  2025-08-07 09:15:03 AM  386.50      0         0.00  27300130.20
9  2025-08-07 09:15:03 AM  386.90   2717   1051207.30  28351337.50


=== LAST 10 ROWS ===
                         date   price   qty       trnvr     cum_trnvr
18240  2025-08-07 03:29:31 PM  388.25   226    87744.50  4.446928e+09
18241  2025-08-07 03:29:31 PM  388.10     0        0.00  4.446928e+09
18242  2

In [3]:
# Convert date to datetime64[ns] and extract datetime features
print("=== DATETIME CONVERSION AND FEATURE EXTRACTION ===")

# Convert date column to datetime64[ns]
df['date'] = pd.to_datetime(df['date'])

# Extract additional datetime features
df['date_only'] = df['date'].dt.date
df['time'] = df['date'].dt.time
df['hour'] = df['date'].dt.hour
df['minute'] = df['date'].dt.minute
df['second'] = df['date'].dt.second

# Display the new datetime features
print("New datetime features added:")
print(f"  - date_only: {df['date_only'].dtype}")
print(f"  - time: {df['time'].dtype}")
print(f"  - hour: {df['hour'].dtype}")
print(f"  - minute: {df['minute'].dtype}")
print(f"  - second: {df['second'].dtype}")

# Show sample of the enhanced dataframe
print("\n=== SAMPLE DATA WITH NEW FEATURES ===")
print(df[['date', 'date_only', 'time', 'hour', 'minute', 'second', 'price', 'qty']].head(10))

# Verify datetime conversion
print(f"\n=== DATETIME VERIFICATION ===")
print(f"Original date column dtype: {df['date'].dtype}")
print(f"First timestamp: {df['date'].iloc[0]}")
print(f"Last timestamp: {df['date'].iloc[-1]}")
print(f"Total unique dates: {df['date_only'].nunique()}")
print(f"Date range: {df['date_only'].min()} to {df['date_only'].max()}")

=== DATETIME CONVERSION AND FEATURE EXTRACTION ===
New datetime features added:
  - date_only: object
  - time: object
  - hour: int32
  - minute: int32
  - second: int32

=== SAMPLE DATA WITH NEW FEATURES ===
                 date   date_only      time  hour  minute  second   price  \
0 2025-08-07 09:15:00  2025-08-07  09:15:00     9      15       0  386.85   
1 2025-08-07 09:15:01  2025-08-07  09:15:01     9      15       1  386.65   
2 2025-08-07 09:15:01  2025-08-07  09:15:01     9      15       1  386.30   
3 2025-08-07 09:15:01  2025-08-07  09:15:01     9      15       1  386.30   
4 2025-08-07 09:15:01  2025-08-07  09:15:01     9      15       1  386.75   
5 2025-08-07 09:15:02  2025-08-07  09:15:02     9      15       2  386.80   
6 2025-08-07 09:15:02  2025-08-07  09:15:02     9      15       2  386.95   
7 2025-08-07 09:15:03  2025-08-07  09:15:03     9      15       3  386.85   
8 2025-08-07 09:15:03  2025-08-07  09:15:03     9      15       3  386.50   
9 2025-08-07 09:15:0