In [None]:
!pip install turicreate

In [None]:
import os
import pandas as pd
import numpy as np
from zipfile import ZipFile
import turicreate

# Load Dataset

In [None]:
ds_dir = '../input/coupon-purchase-prediction'

In [None]:
#unzip dataset
with ZipFile(os.path.join(ds_dir,"coupon_detail_train.csv.zip"), 'r') as zipObj:
   zipObj.extractall()
with ZipFile(os.path.join(ds_dir,"coupon_list_test.csv.zip"), 'r') as zipObj:
   zipObj.extractall()
with ZipFile(os.path.join(ds_dir,"coupon_list_train.csv.zip"), 'r') as zipObj:
   zipObj.extractall()
with ZipFile(os.path.join(ds_dir,"coupon_visit_train.csv.zip"), 'r') as zipObj:
   zipObj.extractall()
with ZipFile(os.path.join(ds_dir,"sample_submission.csv.zip"), 'r') as zipObj:
   zipObj.extractall()
with ZipFile(os.path.join(ds_dir,"user_list.csv.zip"), 'r') as zipObj:
   zipObj.extractall()

In [None]:
#Dataset
cd_train = pd.read_csv('coupon_detail_train.csv')
cl_test = pd.read_csv('coupon_list_test.csv')
cl_train = pd.read_csv('coupon_list_train.csv')
#cv_train = pd.read_csv('coupon_visit_train.csv')
#pref_loc = pd.read_csv(os.path.join(ds_dir,'prefecture_locations.csv'))
sample_sub = pd.read_csv('sample_submission.csv')
user_list = pd.read_csv('user_list.csv')

# Preprocessing and Convert Data to SFrame

In [None]:
cd_train = cd_train.groupby(['USER_ID_hash', 'COUPON_ID_hash'])['PURCHASEID_hash'].count().to_frame().reset_index()

In [None]:
cl_train.drop(['VALIDFROM','VALIDEND'],axis=1,inplace=True)
cl_test.drop(['VALIDFROM','VALIDEND'],axis=1,inplace=True)

cl_train.VALIDPERIOD.fillna(180, inplace=True)
cl_test.VALIDPERIOD.fillna(180, inplace=True)

cl_train.fillna(1, inplace=True)
cl_test.fillna(1, inplace=True)

In [None]:
#on create
observation_data = turicreate.SFrame(cd_train)

item_data = turicreate.SFrame(pd.concat([cl_train,cl_test]))

In [None]:
#on recommend
users = turicreate.SFrame(user_list[['USER_ID_hash']])

items = turicreate.SFrame(cl_test[['COUPON_ID_hash']])

# Fitting

In [None]:
cosine_model = turicreate.recommender.item_content_recommender.create(similarity_metrics='cosine', item_data=item_data, item_id='COUPON_ID_hash', observation_data=observation_data, target='PURCHASEID_hash', user_id='USER_ID_hash', item_data_transform='auto', verbose=True)
jaccard_model = turicreate.recommender.item_content_recommender.create(similarity_metrics='jaccard', item_data=item_data, item_id='COUPON_ID_hash', observation_data=observation_data, target='PURCHASEID_hash', user_id='USER_ID_hash', item_data_transform='auto', verbose=True)
pearson_model = turicreate.recommender.item_content_recommender.create(similarity_metrics='pearson', item_data=item_data, item_id='COUPON_ID_hash', observation_data=observation_data, target='PURCHASEID_hash', user_id='USER_ID_hash', item_data_transform='auto', verbose=True)

# Recommend

In [None]:
cosine_res = cosine_model.recommend(users, k=10, items=items).to_dataframe()
jaccard_res = jaccard_model.recommend(users, k=10, items=items).to_dataframe()
pearson_res = pearson_model.recommend(users, k=10, items=items).to_dataframe()

# Convert to Submission Format

In [None]:
def clean_prediction(row):
    data = row.PURCHASED_COUPONS
    data = str("".join(str(data))[2:-2].replace("', '"," "))
    return data

In [None]:
cosine_res = cosine_res.groupby('USER_ID_hash')['COUPON_ID_hash'].apply(list).reset_index(name='PURCHASED_COUPONS')
jaccard_res = jaccard_res.groupby('USER_ID_hash')['COUPON_ID_hash'].apply(list).reset_index(name='PURCHASED_COUPONS')
pearson_res = pearson_res.groupby('USER_ID_hash')['COUPON_ID_hash'].apply(list).reset_index(name='PURCHASED_COUPONS')

In [None]:
cosine_res['PURCHASED_COUPONS'] = cosine_res.apply(clean_prediction, axis=1)
jaccard_res['PURCHASED_COUPONS'] = jaccard_res.apply(clean_prediction, axis=1)
pearson_res['PURCHASED_COUPONS'] = pearson_res.apply(clean_prediction, axis=1)

In [None]:
cosine_res.to_csv('sub_cpp_turi_item_content_cosine.csv', index=False)
jaccard_res.to_csv('sub_cpp_turi_item_content_jaccard.csv', index=False)
pearson_res.to_csv('sub_cpp_turi_item_content_pearson.csv', index=False)

In [None]:
cosine_res