## Evaluation
This notebook contains three functions:
1. split_train_test: creates train and test sets by splitting the raw data 'user_feature.csv'.
2. evaluate: calculates the mse and mae of the final recommendations to the actual recommendations based on the test set.
3. append_error_to_df: for visualization purposes and for further exploration of the errors.


In [1]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv('user_feature.csv')
features = ['userId', 'movieId', 'rating']
# data
new_data=data[features]
new_data

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0
...,...,...,...
100831,610,166534,4.0
100832,610,168248,5.0
100833,610,168250,5.0
100834,610,168252,5.0


## Splitting

In [4]:
import pandas as pd

def split_train_test(data, train_ratio=0.7):
    """
    Splits the transaction data into train and test sets.
    
    Parameters
    ----------
    data         : pandas DataFrame for transaction table containing user, item, and ratings
    
    train_ratio  : the desired ratio of training set, while 1-train ratio is automatically set for the test set 
    
    
    Returns
    ---------
    df_train_fin : dataframe for the training set
    
    df_test_fin  : dataframe for the test set
    
    """
    
    list_df_train = []
    list_df_test = []
    
    #group by user id
    d = dict(tuple(data.groupby(data.userId)))
    
    #splitting randomly per user
    for i in (d):
        df_train = d[i].sample(frac=train_ratio)
        ind = df_train.index
        df_test = d[i].drop(ind)
        list_df_train.append(df_train) 
        list_df_test.append(df_test)

    # 2. merge selected train set per user to a single dataframe
    df_train_fin = pd.concat(list_df_train)
    df_test_fin = pd.concat(list_df_test)

    return df_train_fin, df_test_fin

In [5]:
df_train, df_test = split_train_test(new_data, 0.70)

In [6]:
df_test

Unnamed: 0,userId,movieId,rating
2,1,6,4.0
4,1,50,5.0
5,1,70,3.0
7,1,110,4.0
11,1,216,5.0
...,...,...,...
100819,610,160080,3.0
100824,610,161582,4.0
100828,610,163981,3.5
100829,610,164179,5.0


In [7]:
df_test.pivot(index='userId', columns='movieId', values='rating')

movieId,1,2,3,4,5,6,7,8,9,10,...,187595,188301,188675,188751,190219,191005,193565,193567,193585,193587
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,,,,,...,,,,,,,,,,
607,,,,,,,,,,,...,,,,,,,,,,
608,,2.0,,,,,,,,,...,,,,,,,,,,
609,3.0,,,,,,,,,,...,,,,,,,,,,


## Metrics for the output of recommerder system
Sample test is created using a subset of the test set, while synthetic result is created by inducing few modifications in the test set.

In [8]:
sample_test = df_test[(df_test.userId>= 2) & (df_test.userId<=4)].pivot(index='userId', columns='movieId', values='rating')
sample_test

movieId,21,106,126,171,222,235,247,319,333,348,...,7899,8798,26409,70946,79132,80906,91529,99114,112552,131724
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,4.0,,...,,3.5,,,4.0,5.0,3.5,3.5,4.0,5.0
3,,,,,,,,,,,...,4.5,,4.5,5.0,,,,,,
4,3.0,4.0,1.0,3.0,1.0,2.0,3.0,5.0,,4.0,...,,,,,,,,,,


In [9]:
# random change in the data for measurement of accuracy
synthetic_result=sample_test-0.5
synthetic_result.iloc[0,1] = 5.0
synthetic_result.iloc[0,5] = 2.0
synthetic_result.iloc[2,0] = 3.0
synthetic_result

movieId,21,106,126,171,222,235,247,319,333,348,...,7899,8798,26409,70946,79132,80906,91529,99114,112552,131724
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,5.0,,,,2.0,,,3.5,,...,,3.0,,,3.5,4.5,3.0,3.0,3.5,4.5
3,,,,,,,,,,,...,4.0,,4.0,4.5,,,,,,
4,3.0,3.5,0.5,2.5,0.5,1.5,2.5,4.5,,3.5,...,,,,,,,,,,


In [11]:
sr1 = sample_test.iloc[:, 0:3]
display(sr1)
sr2 = synthetic_result.iloc[:, 0:3]
display(sr2)

# e=sr1.subtract(sr2, fill_value=0)
# e

movieId,21,106,126
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,,,
3,,,
4,3.0,4.0,1.0


movieId,21,106,126
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,,5.0,
3,,,
4,3.0,3.5,0.5


In [12]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

mse_list = []
mae_list = []

# test indices first, all user ids should be represented in the test matrix
idx1 = sr1.index
idx2 = sr2.index
a=idx1.difference(idx2)


if len(a)==0:
    print('proceed')

    for i in (sr2.index):
        y_pred = sr2[sr2.index==i].fillna(0)
        y = sr1[sr1.index==i].fillna(0)

        mse = mean_squared_error(y, y_pred)
        mae = mean_absolute_error(y, y_pred)

        mse_list.append(mse)
        mae_list.append(mae)

else:
    print(error)
print(mae_list)
print(mse_list)

proceed
[1.6666666666666667, 0.0, 0.3333333333333333]
[8.333333333333334, 0.0, 0.16666666666666666]


In [None]:
#insert errors to the df
sr2.insert(0, 'mse', mse_list)
sr2.insert(0, 'mae', mae_list)
sr2

In [15]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

def evaluate(df_test_result, df_test_data):
    """
    Calculates the mse and mae per user of the results of the recommender system for a given test set.
    
    Parameters
    ----------
    
    df_test_result   : utility matrix containing the result of the recommender systems
    
    df_test_data     : test data generated from splitting the transaction table and tested on the recommender systems
    
    Returns
    ---------
    
    mse_list         : list of mean squared error for each user
    
    mae_list         : list of mean absolute error for each user
    
    """
    
    
    mse_list = []
    mae_list = []
    
#     test indices first, all user ids should be represented in the test matrix 
    idx_orig_data = df_test_data.index
    idx_result = df_test_result.index
    a=idx_orig_data.difference(idx_result)
    
    if len(a)==0:
        print('proceed')
        
        for i in (df_test_result.index):
            y_pred = df_test_result[df_test_result.index==i].fillna(0)
            y = df_test_data[df_test_data.index==i].fillna(0)

            mse = mean_squared_error(y, y_pred)
            mae = mean_absolute_error(y, y_pred)

            mse_list.append(mse)
            mae_list.append(mae)
    else:
        print(error)
    
    return mse_list, mae_list

In [16]:
mse, mae = evaluate(sr2, sr1)
print(mse)
print(mae)

proceed
[8.333333333333334, 0.0, 0.16666666666666666]
[1.6666666666666667, 0.0, 0.3333333333333333]


In [17]:
def append_error_to_df(test_result, mse, mae):
    """
    Inserts the error values into the first two rows of the dataframe of the predictions of system for easy visualization
    and for further computations.
    
    Parameters
    ----------
    
    test_result   : utility matrix for the result of the recommender systems on the test set
    
    mse           : mse computed from function evaluate
    
    mae           : mae computed from function evaluate
    
    Returns
    -------
    
    test_result   : modified utility matrix with errors
    """
    
    test_result.insert(0, 'mse_u', mse)
    test_result.insert(0, 'mae_u', mae)
    
    return test_result
    

In [18]:
df_error = append_error_to_df(sr2, mse, mae)
df_error

movieId,mae_u,mse_u,21,106,126
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,1.666667,8.333333,,5.0,
3,0.0,0.0,,,
4,0.333333,0.166667,3.0,3.5,0.5
