## Evaluation
This notebook contains 2nd and 3rd of three functions:
1. split_train_test: creates train and test sets by splitting the raw data 'user_feature.csv'.
2. evaluate: calculates the mse and mae of the final recommendations to the actual recommendations based on the test set.
3. append_error_to_df: for visualization purposes and for further exploration of the errors.


## Generating input data for unittesting purposes. 
The commented cells are for the purpose of testing the function and unittest only.

In [23]:
# import pandas as pd
# import numpy as np

In [4]:
# data = pd.read_csv('user_feature.csv')
# features = ['userId', 'movieId', 'rating']
# # data
# new_data=data[features]
# new_data

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0
...,...,...,...
100831,610,166534,4.0
100832,610,168248,5.0
100833,610,168250,5.0
100834,610,168252,5.0


## Splitting

In [5]:
# import pandas as pd

# def split_train_test(data, train_ratio=0.7):
#     """
#     Splits the transaction data into train and test sets.
    
#     Parameters
#     ----------
#     data         : pandas DataFrame for transaction table containing user, item, and ratings
    
#     train_ratio  : the desired ratio of training set, while 1-train ratio is automatically set for the test set 
    
    
#     Returns
#     ---------
#     df_train_fin : dataframe for the training set
    
#     df_test_fin  : dataframe for the test set
    
#     df_test_fin* : possible option is a pivoted df ready as the util matrix input of the recsys. In our case, the
#                    index='userId', columns='movieId', values='rating'. To generalize a transaction table, 
#                    index=column[0], columns=itemId, values=rating.
#     """
    
#     list_df_train = []
#     list_df_test = []
    
#     #group by user id
#     d = dict(tuple(data.groupby(data.columns[0]))) #assuming column[0] is the userId
    
#     #splitting randomly per user
#     for i in (d):
#         if len(d[i])<2:
#             print(len(d[i]))
#             list_df_test.append(d[i])
            
#         else:            
#             df_train = d[i].sample(frac=train_ratio)  
#             ind = df_train.index
#             df_test = d[i].drop(ind)
#             list_df_train.append(df_train) 
#             list_df_test.append(df_test)

#     # 2. merge selected train set per user to a single dataframe
#     df_train_fin = pd.concat(list_df_train)
#     df_test_fin = pd.concat(list_df_test)
    
#     # 3. Option to pivot it to create the utility matrix ready as input for recsys
#     df_test_um = df_test_fin.pivot(index=df_test_fin.columns[0], columns=df_test_fin.columns[1], values=df_test_fin.columns[2])
    
#     # 4. get indices of train and test sets
#     indx_train = df_train_fin.index
#     indx_test = df_test_fin.index

#     return df_train_fin, df_test_fin, df_test_um, indx_train, indx_test #return indices

In [7]:
# df_train, df_test, df_test_um, indx_train, indx_test = split_train_test(new_data, 0.70)

In [11]:
# indx_test

Int64Index([     1,      4,      9,     11,     14,     16,     20,     21,
                22,     23,
            ...
            100782, 100785, 100787, 100796, 100799, 100803, 100811, 100812,
            100815, 100834],
           dtype='int64', length=30256)

In [25]:
# df_test_truth = new_data.loc[pd.Index(indx_test), 'rating']
# df_test_truth.values

array([4. , 5. , 5. , ..., 4.5, 3.5, 5. ])

In [87]:
# synthetic_result = np.random.randint(1,11,len(df_test_truth))
# synthetic_result

array([10,  9,  9,  1])

## Metrics for the output of recommerder system
Sample test is created using a subset of the test set, while synthetic result is created by inducing few modifications in the test set.

In [79]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

def evaluate_arrays(model_result_arr, df_data, indx_test):
    """
    Calculates the mse and mae of the recommender system for a given result and test set.
    
    Parameters
    ----------
    
    model_result_arr   : ratings from the results of the recommender sys using test set
    
    df_test_truth      : the original dataframe for before splitting.
                         the original ratings or ground truth from the test set will be extracted from here using indices
                         
    indx_test          : result indices of test set from splitting
    
    Returns
    ---------
    
    mse                : mse value using sklearn 
    
    mae                : mse value using sklearn 
    
    """
    
    df_test_truth = df_data.loc[pd.Index(indx_test), df_data.columns[2]]
    test_arr = df_test_truth.values
         
#     test indices first, all user ids should be represented in the test matrix 

    result_len = len(model_result_arr) 
    test_len = len(test_arr)
      
    if result_len!=test_len:
        raise ValueError('the arrays are of different lengths %s in %s' % (result_len,test_len))
        
    else:
        print('proceed')
            
        mse = mean_squared_error(test_arr, model_result_arr)
        mae = mean_absolute_error(test_arr, model_result_arr)

            
    return mse, mae

In [80]:
mse, mae = evaluate_arrays(synthetic_result, new_data, indx_test)
print(mse)
print(mae)

proceed
13.235316961924907
2.9707330777366474


## Unittest

In [85]:
import unittest
import pandas as pd
import numpy as np
# from pandas._testing import assert_index_equal
# from pandas._testing import assert_frame_equal

class Test_evaluate_arrays(unittest.TestCase):
    
    
    def test_length(self): 
        df1 = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        indx1= [2,3,4,5]
        df_test_truth = df.loc[pd.Index(indx1), df.columns=='r']
        arr_test = df_test_truth.values
        arr_result = np.random.randint(1,11,len(df_test_truth))
        self.assertEqual(len(arr_test), len(arr_result))
        
        
    def test_type_error(self):
        df2 = pd.DataFrame([[1,1,2], [2,3,3], [3,5,5], [3,4,5], [6,7,1], [2,3,1], [5,6,7], [8,9,3], [2,1,1]], index=[0,1,2,3,4,5,6,7,8], columns=['u', 'i', 'r'])
        indx2=[2,3,4,5]
        df_test_truth2 = df2.loc[pd.Index(indx2), df2.columns[2]]
        test_arr = df_test_truth2.values
        arr_result = np.random.randint(1,11,len(df_test_truth2))
        mse, mae = evaluate_arrays(arr_result, df2, indx2)
        self.assertIsNotNone(mae)
        self.assertIsNotNone(mse)
        
unittest.main(argv=[''], verbosity=2, exit=False)

test_length (__main__.Test_evaluate_arrays) ... ok
test_type_error (__main__.Test_evaluate_arrays) ... 

proceed


ok

----------------------------------------------------------------------
Ran 2 tests in 0.008s

OK


<unittest.main.TestProgram at 0x7fb1defaf7c0>