In [242]:
# Step 1: Import libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [243]:
# Step 2: Load dataset
df = pd.read_csv("/content/movies_cleaned.csv")
print("Dataset loaded with rows:", len(df))
df.head(10)

Dataset loaded with rows: 7659


Unnamed: 0,name,genre,year,released,rating,director,writer,star
0,The Shining,Drama,1980,"June 13, 1980 (United States)",8.4,Stanley Kubrick,Stephen King,Jack Nicholson
1,The Blue Lagoon,Adventure,1980,"July 2, 1980 (United States)",5.8,Randal Kleiser,Henry De Vere Stacpoole,Brooke Shields
2,Star Wars: Episode V - The Empire Strikes Back,Action,1980,"June 20, 1980 (United States)",8.7,Irvin Kershner,Leigh Brackett,Mark Hamill
3,Airplane!,Comedy,1980,"July 2, 1980 (United States)",7.7,Jim Abrahams,Jim Abrahams,Robert Hays
4,Caddyshack,Comedy,1980,"July 25, 1980 (United States)",7.3,Harold Ramis,Brian Doyle-Murray,Chevy Chase
5,Friday the 13th,Horror,1980,"May 9, 1980 (United States)",6.4,Sean S. Cunningham,Victor Miller,Betsy Palmer
6,The Blues Brothers,Action,1980,"June 20, 1980 (United States)",7.9,John Landis,Dan Aykroyd,John Belushi
7,Raging Bull,Biography,1980,"December 19, 1980 (United States)",8.2,Martin Scorsese,Jake LaMotta,Robert De Niro
8,Superman II,Action,1980,"June 19, 1981 (United States)",6.8,Richard Lester,Jerry Siegel,Gene Hackman
9,The Long Riders,Biography,1980,"May 16, 1980 (United States)",7.0,Walter Hill,Bill Bryden,David Carradine


In [244]:
# Step 3: Create rating categories
def rating_category(r):
    if r >= 8.0:
        return "High"
    elif r >= 5.0:
        return "Medium"
    else:
        return "Low"

df['ratingCategory'] = df['rating'].apply(rating_category)
print("Class distribution:\n", df['ratingCategory'].value_counts())

Class distribution:
 ratingCategory
Medium    6820
Low        564
High       275
Name: count, dtype: int64


In [245]:
# Step 4: Encode categorical columns
encoders = {}
for col in ["genre", "director", "writer", "star"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    encoders[col] = le

In [246]:
# Step 5: Features (X) and Target (y)
X = df[["genre", "director", "writer", "star", "year"]]
y = df["ratingCategory"]

In [247]:
# Step 6: Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [248]:
# Step 7: Train Random Forest
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

In [249]:
# Step 8: Evaluate model
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print(df.columns.tolist())


Model Accuracy: 0.8909921671018277
['name', 'genre', 'year', 'released', 'rating', 'director', 'writer', 'star', 'ratingCategory']


In [250]:
# Step 9: User Input - Search by Movie Name
movie_name = input("Enter a movie name: ")

if movie_name in df["name"].values:   # <-- match your dataset column name
    movie_details = df[df["name"] == movie_name].iloc[0]
    print("\nMovie Details:")
    print("Name:", movie_details["name"])
    print("Genre:", encoders["genre"].inverse_transform([movie_details["genre"]])[0])
    print("Director:", encoders["director"].inverse_transform([movie_details["director"]])[0])
    print("Writer:", encoders["writer"].inverse_transform([movie_details["writer"]])[0])
    print("Star:", encoders["star"].inverse_transform([movie_details["star"]])[0])
    print("Year:", movie_details["year"])

    # Predict rating category
    features = [[movie_details["genre"], movie_details["director"],
                 movie_details["writer"], movie_details["star"],
                 movie_details["year"]]]

    prediction = model.predict(features)[0]
    print("Predicted Rating Category:", prediction)

else:
    print("Movie not found in dataset!")

Enter a movie name: The Long Riders

Movie Details:
Name: The Long Riders
Genre: Biography
Director: Walter Hill
Writer: Bill Bryden
Star: David Carradine
Year: 1980
Predicted Rating Category: Medium




In [None]:
import pickle

# Suppose your trained model is svm_model
with open("model.pkl", "wb") as f:  # Make sure the path is correct
    pickle.dump(model, f)

print("Model saved successfully in model.pkl")

Model saved successfully in model.pkl
