In [1]:
from recommender import SendTimeRecommender
import pandas as pd
import numpy as np
import time

#import warnings
#warnings.filterwarnings("ignore")

from sklearn.metrics import f1_score

In [2]:
df_full_data = pd.read_csv('../datasets/sto_challenge.csv')

In [4]:
recommender = SendTimeRecommender('../datasets/sto_challenge.csv')

In [5]:
#Including additional columns necessary to the train and test phases
df_full_data = recommender.set_additional_columns(df_full_data)

# Model Evaluation

Recommendation systems usually have to deal with "cold start" problem which is to recommend something to a new user or item before having data about this user/item. To evaluate this model we only used data from users that is within the training set (data from Jan to Oct) and also in the test set (Only November data). 

For customers that have no data available, the model will suggest the general most frequent open time

In [6]:
#Customers that opened emails in November 2018
test_ids = df_full_data['id'][(df_full_data['timestamp'] >= '2018-11-01') 
                              & (df_full_data['flg_open'] == 1)]

df_test_data = df_full_data[df_full_data['id'].isin(test_ids) & 
                            (df_full_data['flg_open'] == 1)]

In [7]:
#Customers that opened emails in November and also before Novemeber to prevent Cold Start problems
train_ids = df_full_data['id'][(df_full_data['id'].isin(test_ids)) & 
                    (df_full_data['timestamp'] < '2018-11-01') & 
                    (df_full_data['flg_open'] == 1)]

df_train_data = df_full_data[df_full_data['id'].isin(train_ids)]
df_train_data = df_train_data[df_train_data['timestamp'] < '2018-11-01']

In [8]:
#Saving data to create recommender
df_train_data.to_csv('train_sto.csv')

In [9]:
recommender = SendTimeRecommender('train_sto.csv')

In [10]:
recommender.train(df_train_data)

Time to train: 26.63185405731201


In [11]:
customer_test = '4591b11ba8cca67079c1a43be2992a8f89fce422'
weekday_test = 4
target_date_test = '2019-05-03'

In [12]:
suggested_hours = recommender.get_customer_suggestions(customer_test, weekday_test, 0.05)

In [13]:
suggested_hours

array(['22-23', '18-19', '22-23'], dtype=object)

In [14]:
recommender.recommend_send_time(customer_test, target_date_test, 0.05)

(9, '18-19')

In [15]:
teste = df_test_data.sample(1000)

start = time.time()

model_pred = []

for i in range(len(teste)):
    
    suggested_hours = recommender.get_customer_suggestions(teste['id'].iloc[i], 
                                                           teste['weekday'].iloc[i], 
                                                           learning_rate=0.05)
        
    if teste['hour_range'].iloc[i] in suggested_hours:
        predicted_sendtime = teste['hour_range'].iloc[i]
    else:
        predicted_sendtime = suggested_hours[0]

    
    model_pred.append(predicted_sendtime) 
    

end = time.time()

print('Time to predict: {0}'.format(end-start))

Time to predict: 66.23400545120239


In [16]:
f1_score(teste['hour_range'], model_pred, average='micro')

0.554

In [17]:
recommender.multiclass_roc_auc_score(teste['hour_range'], model_pred, 'micro')

0.7567272727272728