In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Create a sample dataset
data = {
    'Movie Title': ['Inception', 'Interstellar', 'The Dark Knight', 'Inception', np.nan],
    'Genre': ['Sci-Fi', 'Sci-Fi', 'Action', 'Sci-Fi', 'Action'],
    'Rating': [8.8, 8.6, 9.0, 8.8, np.nan],
    'Release Year': [2010, 2014, 2008, 2010, 2012],
    'Director': ['Nolan', 'Nolan', 'Nolan', 'Nolan', 'Nolan']
}

# Convert dictionary to DataFrame
df = pd.DataFrame(data)

# Display original dataset
print("Original Dataset:")
print(df)

# 1️⃣ Handling Missing Values
# Check for missing values
print("\nMissing Values:")
print(df.isna().sum())

# Fill missing 'Movie Title' with 'Unknown' and 'Rating' with mean
df['Movie Title'] = df['Movie Title'].fillna('Unknown')
df['Rating'] = df['Rating'].fillna(df['Rating'].mean())

# 2️⃣ Removing Duplicates
df = df.drop_duplicates()

# 3️⃣ Replace values: replace 'Sci-Fi' with 'Science Fiction'
df['Genre'] = df['Genre'].replace('Sci-Fi', 'Science Fiction')

# 4️⃣ Renaming Columns
df.rename(columns={'Release Year': 'Year'}, inplace=True)

# 5️⃣ Detecting and Removing Outliers
# Remove movies with Rating > 9.5 as outliers (for demo)
df = df[df['Rating'] <= 9.5]

# 6️⃣ String Operations: convert Movie Title to uppercase
df['Movie Title'] = df['Movie Title'].str.upper()

# 7️⃣ Final Cleaned Dataset
print("\nCleaned Dataset:")
print(df)

# 8️⃣ Save cleaned data to a CSV file
df.to_csv('cleaned_movie_data.csv', index=False)


Original Dataset:
       Movie Title   Genre  Rating  Release Year Director
0        Inception  Sci-Fi     8.8          2010    Nolan
1     Interstellar  Sci-Fi     8.6          2014    Nolan
2  The Dark Knight  Action     9.0          2008    Nolan
3        Inception  Sci-Fi     8.8          2010    Nolan
4              NaN  Action     NaN          2012    Nolan

Missing Values:
Movie Title     1
Genre           0
Rating          1
Release Year    0
Director        0
dtype: int64

Cleaned Dataset:
       Movie Title            Genre  Rating  Year Director
0        INCEPTION  Science Fiction     8.8  2010    Nolan
1     INTERSTELLAR  Science Fiction     8.6  2014    Nolan
2  THE DARK KNIGHT           Action     9.0  2008    Nolan
4          UNKNOWN           Action     8.8  2012    Nolan


  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
