In [4]:
# ============================================
# NOTEBOOK 2: DATA CLEANING
# ============================================

# CELL 1: Import
import pandas as pd
import os

# CELL 2: Check location
print("Working directory:", os.getcwd())
print("Data folder contents:", os.listdir('../data'))

# CELL 3: Load data
print("\nLoading data...")
matches_df = pd.read_csv('../data/matches.csv')
deliveries_df = pd.read_csv('../data/deliveries.csv')
print(f"Matches: {matches_df.shape}")
print(f"Deliveries: {deliveries_df.shape}")

# CELL 4: Clean matches
print("\nCleaning matches...")
matches_df = matches_df.drop_duplicates()
matches_df['date'] = pd.to_datetime(matches_df['date'], errors='coerce')
print("✓ Matches cleaned")

# CELL 5: Clean deliveries
print("\nCleaning deliveries...")
deliveries_df = deliveries_df.fillna(0)
print("✓ Deliveries cleaned")

# CELL 6: Save
print("\nSaving...")
matches_df.to_csv('../data/matches_cleaned.csv', index=False)
deliveries_df.to_csv('../data/deliveries_cleaned.csv', index=False)
print("✓ Saved matches_cleaned.csv")
print("✓ Saved deliveries_cleaned.csv")

# CELL 7: Verify
print("\nVerifying...")
print("Files in data folder:")
for f in os.listdir('../data'):
    print(f"  - {f}")

print("\n✅ DONE!")


Working directory: c:\Users\Sai Tejeswar\Documents\ipl-cricket-analytics\notebooks
Data folder contents: ['deliveries.csv', 'matches.csv']

Loading data...
Matches: (1095, 20)
Deliveries: (260920, 17)

Cleaning matches...
✓ Matches cleaned

Cleaning deliveries...
✓ Deliveries cleaned

Saving...
✓ Saved matches_cleaned.csv
✓ Saved deliveries_cleaned.csv

Verifying...
Files in data folder:
  - deliveries.csv
  - deliveries_cleaned.csv
  - matches.csv
  - matches_cleaned.csv

✅ DONE!
