In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score



In [2]:
# Create a synthetic dataset (Replace this with your own dataset)
data = {
    'Genre': ['Action', 'Comedy', 'Drama', 'Action', 'Comedy', 'Drama'],
    'Director': ['John', 'Jane', 'John', 'Jane', 'John', 'Jane'],
    'Budget': [10000000, 15000000, 8000000, 12000000, 9000000, 10000000],
    'Rating': [7.8, 6.5, 8.2, 7.1, 6.9, 8.4]
}

df = pd.DataFrame(data)

In [3]:
# Convert categorical features into numerical using one-hot encoding
df = pd.get_dummies(df, columns=['Genre', 'Director'])

In [4]:
# Split dataset into features (X) and target (y)
X = df.drop(columns=['Rating'])
y = df['Rating']

In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [7]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [8]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')


Mean Squared Error: 0.5300000000016294
R-squared: -0.2544378698263421


In [9]:
# Now you can use the model to predict movie ratings for new data
new_data = {
    'Budget': [11000000],
    'Genre_Action': [1],
    'Genre_Comedy': [0],
    'Genre_Drama': [0],
    'Director_Jane': [0],
    'Director_John': [1]
    
}

new_df = pd.DataFrame(new_data)
predicted_rating = model.predict(new_df)

print(f'Predicted Rating: {predicted_rating[0]}')

Predicted Rating: 6.62499999999929
