In [37]:
import pandas as pd
import plotly.io as pio
import plotly.express as px

data = pd.read_csv("movies_with_genres.csv")
data.head()
data.tail()
print(data.columns)

Index(['Title', 'Genre', 'Rating/Score'], dtype='object')


In [38]:

# --- Clean Title ---
data['Title'] = data['Title'].astype(str).str.replace(',', '', regex=False).str.strip()

# --- Clean & Split Genre ---
# Remove spaces, then split on '/'
data['Genre'] = (
    data['Genre']
    .astype(str)
    .str.replace(',', '', regex=False)
    .str.strip()
    .str.split('/')
)

# Strip whitespace and standardize casing (e.g. "horror" → "Horror")
data['Genre'] = data['Genre'].apply(
    lambda genres: [g.strip().capitalize() for g in genres] if isinstance(genres, list) else genres
)

# Explode list → one row per genre
data = data.explode('Genre')

# --- Clean Rating/Score ---
data['Rating/Score'] = (
    data['Rating/Score']
    .astype(str)
    .str.replace(',', '', regex=False)
    .str.replace('%', '', regex=False)
)
data['Rating/Score'] = pd.to_numeric(data['Rating/Score'], errors='coerce').fillna(0).astype(int)

# (Optional) Reset index and save cleaned version
data = data.reset_index(drop=True)
data.to_csv("cleaned_movies.csv", index=False)



In [39]:
print(data.head())

                       Title   Genre  Rating/Score
0  ON BECOMING A GUINEA FOWL  Comedy           100
1  ON BECOMING A GUINEA FOWL   Drama           100
2                     EEPHUS  Comedy           100
3                     EEPHUS   Drama           100
4                     EEPHUS  Sports           100


In [40]:
print(data.head())

                       Title   Genre  Rating/Score
0  ON BECOMING A GUINEA FOWL  Comedy           100
1  ON BECOMING A GUINEA FOWL   Drama           100
2                     EEPHUS  Comedy           100
3                     EEPHUS   Drama           100
4                     EEPHUS  Sports           100


In [43]:
print(data)
print(data.dtypes)

                         Title      Genre  Rating/Score
0    ON BECOMING A GUINEA FOWL     Comedy           100
1    ON BECOMING A GUINEA FOWL      Drama           100
2                       EEPHUS     Comedy           100
3                       EEPHUS      Drama           100
4                       EEPHUS     Sports           100
..                         ...        ...           ...
175               MOUNTAINHEAD     Comedy            74
176               MOUNTAINHEAD      Drama            74
177          QUEEN OF THE RING  Biography            73
178          QUEEN OF THE RING      Drama            73
179          QUEEN OF THE RING     Sports            73

[180 rows x 3 columns]
Title           object
Genre           object
Rating/Score     int64
dtype: object


In [44]:
print(data.dtypes)

Title           object
Genre           object
Rating/Score     int64
dtype: object
