In [28]:
import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt

from src import utils, models, metrics

In [5]:
df_movies, df_users, df_ratings = utils.read_pickles("../../data/ml-1m-after_eda/")

### Baseline predictor

In [6]:
train, test = utils.TrainTestSplitter.split_by_percent(df_ratings, 0.8,
                                                       sort_by_datetime=True)
true_ratings = test["Rating"].values
true_ratings

In [7]:
base_model = models.BaseModelAverage()
base_model.fit(train)
predicted_scores_baseline = base_model.predict(test["MovieID"])

In [8]:
predicted_scores_baseline

### DeepLearning predictor

In [9]:
df = utils.dl_data_pipeline(df_movies, df_users, df_ratings)
df = df.drop(["UserID", "MovieID"], axis=1)
df.head(3)

In [10]:
train, test = utils.TrainTestSplitter.split_by_percent(df, 0.8)

In [11]:
mean_users_rating = train["AvgUserRating"].mean()
mean_movies_rating = train["AvgMovieRating"].mean()

train["AvgUserRating"] = train["AvgUserRating"].fillna(mean_users_rating)
train["AvgMovieRating"] = train["AvgMovieRating"].fillna(mean_movies_rating)

test["AvgUserRating"] = test["AvgUserRating"].fillna(mean_users_rating)
test["AvgMovieRating"] = test["AvgMovieRating"].fillna(mean_movies_rating)

In [12]:
model = models.MovieRatingNN(112)
model.load_state_dict(torch.load("../../artifacts/simple_nn.pth"))
model.eval()

In [13]:
test_tensor = torch.tensor(test.drop(["Rating"], axis=1).values,
                           dtype=torch.float32)
predicted_scores_nn = model.forward(test_tensor).detach().numpy().flatten()

### A/B Tester

In [26]:
from scipy import stats

class IncrementalABTester:
    def __init__(self):
        self.true_ratings = np.array([])
        self.predictions_baseline = np.array([])
        self.predictions_nn = np.array([])
        self.p_hist = []

    def add_data(self, true_rating, prediction_baseline, prediction_nn):
        self.true_ratings = np.append(self.true_ratings, true_rating)
        self.predictions_baseline = np.append(self.predictions_baseline, prediction_baseline)
        self.predictions_nn = np.append(self.predictions_nn, prediction_nn)

    def mse(self, predictions):
        return np.mean((self.true_ratings - predictions) ** 2)

    def run_tests(self):
        rmse_baseline = self.mse(self.predictions_baseline)
        rmse_nn = self.mse(self.predictions_nn)
        
        # Conduct a paired t-test if we have enough data
        if len(self.true_ratings) > 1:
            _, p_value = stats.ttest_rel(self.predictions_baseline, self.predictions_nn)
        else:
            p_value = np.nan  # Not enough data to test
        self.p_hist.append(p_value)

        results = {
            'rmse_baseline': round(rmse_baseline,3),
            'rmse_nn': round(rmse_nn,3),
            'p_value': round(p_value,3)
        }
        
        return results

In [27]:
tester = IncrementalABTester()

for true, baseline, nn in zip(true_ratings, predicted_scores_baseline,
                              predicted_scores_nn):
    tester.add_data(true, baseline, nn)
    if len(tester.true_ratings) % 10 == 0:  # Periodic testing, e.g., every 10 record
        results = tester.run_tests()
    if len(tester.true_ratings) % 50000 == 0:
        print(f"After {len(tester.true_ratings)} records: ", results)

In [40]:
plt.figure(figsize=(4,4))
plt.plot(tester.p_hist)
plt.xlim(100, 300)
plt.xlabel("Records")
plt.ylabel("p_value")
plt.grid(0.3)
plt.show()

In [94]:
# Calculate errors
errors_baseline = abs(true_ratings - predicted_scores_baseline)
errors_nn = abs(true_ratings - predicted_scores_nn)

# Calculate mean errors
mean_abs_error_baseline = np.mean(errors_baseline)
mean_abs_error_nn = np.mean(errors_nn)

# Plotting histograms
plt.figure(figsize=(10, 6))
plt.hist(errors_baseline, bins=100, alpha=0.5, label='Baseline Errors', color='blue')
plt.hist(errors_nn, bins=100, alpha=0.5, label='NN Errors', color='red')

# Adding mean lines
plt.axvline(mean_abs_error_baseline, color='blue', linestyle='dashed', linewidth=2, label=f'Mean Baseline Error: {mean_error_baseline:.2f}')
plt.axvline(mean_abs_error_nn, color='red', linestyle='dashed', linewidth=2, label=f'Mean NN Error: {mean_error_nn:.2f}')

# Adding labels and legend
plt.xlabel('Error')
plt.ylabel('Frequency')
plt.title('Histogram of Prediction Errors')
plt.legend()

plt.show()