In [4]:
import pandas as pd
from plotnine import ggplot, aes, geom_point, geom_smooth, labs, geom_segment
import numpy as np
from sklearn.linear_model import LinearRegression

# Read CSV file
ratings = pd.read_csv("../data/movieratings.csv")

# Display the first few rows
print(ratings.head())

   movie_id    movie_title  tomatometer_rating  audience_rating
0     17401   Willow Creek                81.0             34.0
1      4775        Colonia                26.0             61.0
2     13218  She's the Man                43.0             79.0
3     10539         Mongol                86.0             76.0
4      8462           I.Q.                44.0             47.0


In [5]:
complete_index = ratings.notnull().all(axis=1) #not null entries in all columns (axis = 1)

print(ratings[~complete_index]) #~ is a negation operator  – switches True to False and vice versa

ratings_complete = ratings[complete_index].copy() #use copy to allow changes to DataFrame later on without modifying the original df


     movie_id                          movie_title  tomatometer_rating  \
45       9326  Le Deuxième souffle (Second Breath)                 NaN   
52      15191                             The Hunt                83.0   
103     17532   The World, the Flesh and the Devil                 NaN   

     audience_rating  
45              67.0  
52               NaN  
103             67.0  


In [6]:
# Fit linear model
X = ratings_complete[['tomatometer_rating']]
y = ratings_complete[['audience_rating']]
model = LinearRegression().fit(X, y)

In [7]:
# Linear model summary
slope = model.coef_[0]
intercept = model.intercept_
print(f"Slope: {slope}, Intercept: {intercept}")

Slope: [0.4460597], Intercept: [34.50894913]


In [9]:
borderlands = 8
aqp = 86

borderlands_predicted = intercept + slope * borderlands
aqp_predicted = intercept + slope * aqp
print(f"Predicted audience rating for Borderlands based on critic rating of {borderlands}: {borderlands_predicted}")
print(f"Predicted audience rating for A Quiet Place based on critic rating of {aqp}: {aqp_predicted}")

Predicted audience rating for Borderlands based on critic rating of 8: [38.07742671]
Predicted audience rating for A Quiet Place based on critic rating of 86: [72.87008307]
