#### IMPORTS

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

#### DATASET 

In [2]:
data = pd.read_csv('IMDb Movies India.csv', encoding='latin-1')


#### DATA PREPROCESSING

In [3]:
data.dropna(subset=['Name', 'Genre', 'Rating', 'Director', 'Actor 1', 'Actor 2', 'Actor 3'], inplace=True)


#### TEST TRAIN SPLIT

In [4]:
X = data.drop('Rating', axis=1)
y = data['Rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### MODEL SELECTION 

In [5]:
categorical_features = ['Name', 'Genre', 'Director', 'Actor 1', 'Actor 2', 'Actor 3']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [6]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)
    ])

model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', RandomForestRegressor())])

#### MODEL TRAINING 

In [7]:
model.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('cat',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value='missing',
                                                                                 strategy='constant')),
                                                                  ('onehot',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  ['Name', 'Genre', 'Director',
                                                   'Actor 1', 'Actor 2',
                                                   'Actor 3'])])),
                ('regressor', RandomForestRegressor())])

#### MODEL EVALUATION 

In [9]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

Mean Squared Error: 1.5666552354497358


#### PREDICTION

In [10]:
def predict_movie_rating(movie):
    movie_data = pd.DataFrame([movie], columns=X.columns)
    rating = model.predict(movie_data)
    return rating[0]

# Step 10: Input and Prediction
movie_name = input("Enter the movie name: ")
genre = input("Enter the genre: ")
director = input("Enter the director: ")
actor_1 = input("Enter the first actor: ")
actor_2 = input("Enter the second actor: ")
actor_3 = input("Enter the third actor: ")

movie = {'Name': movie_name, 'Genre': genre, 'Director': director, 
         'Actor 1': actor_1, 'Actor 2': actor_2, 'Actor 3': actor_3}

predicted_rating = predict_movie_rating(movie)
print(f"Predicted rating for {movie_name}: {predicted_rating}")

Enter the movie name: 3 Idiots
Enter the genre: Comedy
Enter the director: Rajkumar Hirani
Enter the first actor: Aamir Khan
Enter the second actor: Madhavan
Enter the third actor: Mona Singh
Predicted rating for 3 Idiots: 7.543000000000007
