In [None]:
import tensorflow as tf
from tensorflow import keras

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sqlalchemy import create_engine
conn = create_engine('sqlite:///Data//mubi_db.sqlite').connect()

In [None]:
import random
from itertools import chain
def flatten(list_of_lists):
    "Flatten one level of nesting"
    return chain.from_iterable(list_of_lists)

In [None]:
def is_close_enough(pred, ratings):
    val = []
    for pair in zip(pred, ratings):
        if pair[1] == 5:
            val.append(pair[1] - 1 <= pair[0])
        else:
            val.append(pair[1] - .5 <= pair[0] <= pair[1] + .5)
        
    return val

In [None]:
X_train = pd.read_pickle('saved_model/rmse_839/X_train.pkl')
X_test = pd.read_pickle('saved_model/rmse_839/X_test.pkl')
ratings = pd.read_pickle("./Data/final_dataset_pre_split.pkl")

In [None]:
model = keras.models.load_model('saved_model/rmse_839/model.h5')

### Check X_test

In [None]:
X_test.head()

In [None]:
X_test.shape

In [None]:
random_10000_users = random.choices(X_test.user.unique(), k=10000)

In [None]:
predictions = []
accuracy = []

for user in random_10000_users:
    movie_array = X_test[X_test.user==user].movie.values.astype(int)
    user_array = np.array([int(user) for n in range(len(movie_array))])
    pred = model.predict([movie_array, user_array])
    val = is_close_enough(np.round(pred.flatten(), 1), X_test[X_test.user==user].rating_score.values)
    
    predictions.append(list(pred.flatten()))
    accuracy.append(sum(val) / len(pred.flatten()))

In [None]:
sns.histplot(accuracy)
plt.show()

In [None]:
sns.histplot(list(flatten(predictions)), bins=5, color='red', fill=False,
            stat='density', common_norm=False)


sns.histplot(X_test[X_test.user.isin(random_10000_users)].rating_score.values, bins=5, color='blue', fill=False,
            stat='density', common_norm=False)

plt.show()

In [None]:
X_test.groupby('user').count()['movie'].loc[random_10000_users].values

In [None]:
plt.scatter(x=X_test.groupby('user').count()['movie'].loc[random_10000_users].values,
           y=accuracy)

plt.title('n_ratings v accuracy')
plt.show()

In [None]:
X_test.groupby('user').mean()['rating_score'].loc[random_10000_users].values

In [None]:
plt.scatter(x=X_test.groupby('user').mean()['rating_score'].loc[random_10000_users].values,
           y=accuracy)

plt.title('mean rating score v accuracy')
plt.show()

In [None]:
plt.scatter(x=X_test.groupby('user').mean()['rating_score'].loc[random_10000_users].values,
           y=[np.mean(pred) for pred in predictions])

plt.title('mean rating score v mean pred rating score')
plt.show()