In [49]:
import numpy as np
import pandas as pd

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import precision_score, recall_score

In [50]:
def apply_ml_metrics(true_scores, predict_scores, threshold=4):

    true_scores_cat = (true_scores >= threshold).astype(int)
    predict_scores_cat = (predict_scores >= threshold).astype(int)
    
    mae = mean_absolute_error(true_scores, predict_scores)
    rmse = np.sqrt(mean_squared_error(true_scores, predict_scores))
    precision = precision_score(true_scores_cat, predict_scores_cat)
    recall = recall_score(true_scores_cat, predict_scores_cat)

    return {"mae": round(mae, 3),
            "rmse": round(rmse, 3),
            "precision": round(precision, 3),
            "recall": round(recall, 3)}

class BaselineModel():

    def fit(self, train):
        self.rate_dict = train.groupby("MovieID").mean()["Rating"].to_dict()
        self.avrg_rate = train.groupby("MovieID").mean()["Rating"].mean()

    def predict(self, movies_ids):
        predict = []
        for movie_id in movies_ids:
            if movie_id in self.rate_dict:
                predict.append(self.rate_dict[movie_id])
            else:
                predict.append(self.avrg_rate)
        return np.array(predict)

#### Baseline model

In [51]:
df_movies = pd.read_pickle("../../data/ml-1m-after_eda/movies.pickle")
df_users = pd.read_pickle("../../data/ml-1m-after_eda/users.pickle")
df_ratings = pd.read_pickle("../../data/ml-1m-after_eda/ratings.pickle")

In [52]:
def train_test_split(df, split_date):
    train = df[df["Date"] < split_date][["UserID", "MovieID", "Rating"]]
    test = df[df["Date"] >= split_date][["UserID", "MovieID", "Rating"]]
    return train, test

split_date = pd.to_datetime("2000-12-02").date()
train, test = train_test_split(df_ratings, split_date)
print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")

In [53]:
# get true
true_scores = test["Rating"]

# get predict
model = BaselineModel()
model.fit(train)
predict_scores = model.predict(test["MovieID"])

apply_ml_metrics(true_scores, predict_scores)