In [5]:
# CodSoft Data Science Internship
# Task 2: Movie Rating Prediction

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Load dataset
data = pd.read_csv("IMDb Movies India.csv", encoding='latin1')

print("Dataset loaded")
print(data.head())

# Select required columns
# (numeric columns only for beginner level)
data = data[['Rating', 'Votes', 'Duration']]

# Clean 'Duration' column (remove ' min' and convert to numeric)
data['Duration'] = data['Duration'].str.replace(' min', '', regex=False)
data['Duration'] = pd.to_numeric(data['Duration'], errors='coerce')

# Clean 'Votes' column (convert to numeric, handling potential non-numeric strings)
data['Votes'] = pd.to_numeric(data['Votes'], errors='coerce')

# Handle missing values after conversion
data.fillna(data.mean(), inplace=True)

# Split features and target
X = data[['Votes', 'Duration']]
y = data['Rating']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Error calculation
mae = mean_absolute_error(y_test, y_pred)

print("Mean Absolute Error:", mae)


Dataset loaded
                                 Name    Year Duration            Genre  \
0                                         NaN      NaN            Drama   
1  #Gadhvi (He thought he was Gandhi)  (2019)  109 min            Drama   
2                         #Homecoming  (2021)   90 min   Drama, Musical   
3                             #Yaaram  (2019)  110 min  Comedy, Romance   
4                   ...And Once Again  (2010)  105 min            Drama   

   Rating Votes            Director       Actor 1             Actor 2  \
0     NaN   NaN       J.S. Randhawa      Manmauji              Birbal   
1     7.0     8       Gaurav Bakshi  Rasika Dugal      Vivek Ghamande   
2     NaN   NaN  Soumyajit Majumdar  Sayani Gupta   Plabita Borthakur   
3     4.4    35          Ovais Khan       Prateik          Ishita Raj   
4     NaN   NaN        Amol Palekar  Rajat Kapoor  Rituparna Sengupta   

           Actor 3  
0  Rajendra Bhatia  
1    Arvind Jangid  
2       Roy Angana  
3  Siddhant