# 📊 Netflix Movies Data Cleaning
This notebook performs data cleaning on the Netflix Movies dataset up to 2025.

In [None]:

import pandas as pd


In [None]:

# Load the dataset
df = pd.read_csv('netflix_movies_detailed_up_to_2025.csv')

# Check initial data
df.head()


In [None]:

# Dataset information
df.info()


In [None]:

# Drop 'duration' column since it's completely empty
df.drop(columns=['duration'], inplace=True)

# Fill missing 'director', 'cast', 'description' with 'Unknown'
for col in ['director', 'cast', 'description']:
    df[col].fillna('Unknown', inplace=True)

# Fill missing 'genres' and 'country' with 'Not Specified'
df['genres'].fillna('Not Specified', inplace=True)
df['country'].fillna('Not Specified', inplace=True)


In [None]:

# Remove duplicate rows
df.drop_duplicates(inplace=True)

# Standardize 'country' values
df['country'] = df['country'].str.strip().str.title()


In [None]:

# Convert 'date_added' to datetime
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

# Rename columns to snake_case
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')


In [None]:

# Final check of the cleaned dataset
df.info()
df.head()


In [None]:

# Save the cleaned dataset
df.to_csv('cleaned_netflix_data.csv', index=False)

print("✅ Cleaned data saved as 'cleaned_netflix_data.csv'")
