In [9]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import pyspark as ps    # for the pyspark suite
from lightfm import LightFM
import scipy.stats as stats

spark = ps.sql.SparkSession.builder \
            .master("local[4]") \
            .appName("df lecture") \
            .getOrCreate()

sc = spark.sparkContext  # for the pre-2.0 sparkContext

In [10]:

# Always make it pretty.
plt.style.use('ggplot')

## Recommender model comparisons

In [11]:
from pyspark.ml import recommendation
mPath =  "als_recommender"
persisted_ALS_model = recommendation.ALSModel.load(mPath)

pickle_in = open("lightfm-no-features.pkl","rb")
persisted_light_FM = pickle.load(pickle_in)
pickle_in_dataset = open("dataset-lightfm-no-features.pkl","rb")
persisted_dataset_light_FM = pickle.load(pickle_in_dataset)

validata = pd.read_csv('data/test_ratings_df.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'dataset-lightfm-no-features.pkl'

In [14]:
def calc_prediction_scores(holdout_set):
    count = 0
    user_ground_truth, movie_mean_rating, lfm_rating, als_rating = [], [], [], []
    ho_users = holdout_set.groupby('userId').count().reset_index()['userId']
    for user in ho_users:
        count += 1
        if count < 4000:
            sampled_user_movies = holdout_set[holdout_set['userId'] == user][['movieId','rating','mean']]#'movie Id numbers, not indices'
            sampled_user_movies['userId'] = user

            movie_samps = []
            for movie in sampled_user_movies['movieId']:
                movie_samps.append(persisted_dataset_light_FM.mapping()[2][movie])
                
            # ALS Model predictions
            df2=spark.createDataFrame(sampled_user_movies)
            ALS_pred=persisted_ALS_model.transform(df2).toPandas()
#             print(ALS_pred.head(3))
            
            #LightFM model predictions
            prediction = persisted_light_FM.predict(user_ids = persisted_dataset_light_FM.mapping()[0][user], item_ids = movie_samps, item_features=None, user_features=None)
            sampled_user_movies['lfm_predict'] = prediction
#             print(sampled_user_movies.sort_values(by='lfm_predict',ascending=False).head(3))
            
            sampled_user_movies=sampled_user_movies.drop(['rating', 'mean','userId'], axis=1)
            sampled_user_movies=sampled_user_movies.merge(ALS_pred, how='inner', left_on='movieId', right_on='movieId')
            sampled_user_movies=sampled_user_movies.rename(columns = {'prediction':'als_predict'})
            
            user_ground_truth.append(sampled_user_movies.sort_values('rating', ascending=False).head(5).sum()['rating'] / 5)
            movie_mean_rating.append(sampled_user_movies.sort_values('mean', ascending=False).head(5).sum()['rating'] / 5)
            lfm_rating.append(sampled_user_movies.sort_values('lfm_predict', ascending=False).head(5).sum()['rating'] / 5)
            als_rating.append(sampled_user_movies.sort_values('als_predict', ascending=False).head(5).sum()['rating'] / 5)
    
    return user_ground_truth, movie_mean_rating, lfm_rating, als_rating, np.mean(user_ground_truth), np.std(user_ground_truth), np.mean(movie_mean_rating), np.std(movie_mean_rating), np.mean(lfm_rating), np.std(lfm_rating), np.mean(als_rating), np.std(als_rating)
        

In [15]:
ground, pop_rtg, lfm_rtg, als_rtg, ground_mean, ground_std, pop_best, pop_std, lfm_best, lfm_std, als_best, als_std = calc_prediction_scores(validata)

In [16]:
print(ground_mean, pop_best, lfm_best, als_best)

4.556140350877193 3.9315789473684206 3.7561403508771924 4.06390977443609


In [17]:
bench_test = pd.DataFrame(ground, columns=(['ground']))
bench_test['pop_rating'] = pd.DataFrame(pop_rtg)
bench_test['lfm_rating'] = pd.DataFrame(lfm_rtg)
bench_test['als_rating'] = pd.DataFrame(als_rtg)

In [None]:
bench_test.head(20)

In [22]:
bench_test.to_csv('data/bench_test.csv', index=False)

## Statistical comparisons
- How do the results of the bench test compare? 
- Develop a hypothesis, and test the hypothesis.
- Hypothesis: The mean score on the bench test data for the ALS model is 0.2 points higher than the score for the LFM model.
- test statistic, p, will give us the probability of seeing observed results that are at least as extreme as what was measured.

In [4]:
def welch_test_statistic(sample_1, sample_2):
    numerator = np.mean(sample_1) - np.mean(sample_2)
    denominator_sq = (np.var(sample_1) / len(sample_1)) + (np.var(sample_2) / len(sample_2))
    return numerator / np.sqrt(denominator_sq)

def welch_satterhwaithe_df(sample_1, sample_2):
    ss1 = len(sample_1)
    ss2 = len(sample_2)
    df = (
        ((np.var(sample_1)/ss1 + np.var(sample_2)/ss2)**(2.0)) / 
        ((np.var(sample_1)/ss1)**(2.0)/(ss1 - 1) + (np.var(sample_2)/ss2)**(2.0)/(ss2 - 1))
    )
    return df

In [6]:
# Load data
bench_test = pd.read_csv('data/bench_test.csv')

In [8]:
bench_test.describe()

Unnamed: 0,ground,pop_rating,lfm_rating,als_rating
count,399.0,399.0,399.0,399.0
mean,4.55614,3.931579,3.75614,4.06391
std,0.365821,0.571567,0.565869,0.486192
min,2.8,1.2,1.0,1.8
25%,4.3,3.7,3.4,3.8
50%,4.6,4.0,3.8,4.1
75%,4.9,4.3,4.2,4.4
max,5.0,5.0,5.0,5.0


In [None]:
test_statistic = welch_test_statistic(nyt_s_in_yes['CTR'], nyt_s_in_no['CTR'])
print("Welch Test Statistic signed in vs not: {:2.2f}".format(test_statistic))

df = welch_satterhwaithe_df(nyt_s_in_yes['CTR'], nyt_s_in_no['CTR'])
print("Degrees of Freedom for Welch's Test: {:2.2f}".format(df))

students = stats.t(df)
p_value = students.cdf(test_statistic) + (1 - students.cdf(-test_statistic))
print("p-value for signed in vs not different click-thru rate: {:2.2f}".format(p_value))