### 1) Importing Libraries

In [None]:
import sys
import os
import json
sys.path.append(os.path.abspath("../"))

#from recsys import *
#from generic_preprocessing import *
#from IPython.display import HTML
#import pandas as pd

## Importing required libraries
import pandas as pd ## For DataFrame operation
import numpy as np ## Numerical python for matrix operations
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler ## Preprocessing function
#import pandas_profiling ## For easy profiling of pandas DataFrame
#import missingno as msno ## Missing value co-occurance analysis


In [None]:
#import pandas as pd
#import numpy as np
from scipy import sparse
from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity

def create_interaction_matrix(df,user_col, item_col, rating_col, norm= False, threshold = None):
    '''
    Function to create an interaction matrix dataframe from transactional type interactions
    Required Input -
        - df = Pandas DataFrame containing user-item interactions
        - user_col = column name containing user's identifier
        - item_col = column name containing item's identifier
        - rating col = column name containing user feedback on interaction with a given item
        - norm (optional) = True if a normalization of ratings is needed
        - threshold (required if norm = True) = value above which the rating is favorable
    Expected output - 
        - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
    '''
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions

In [None]:
def create_user_dict(interactions):
    '''
    Function to create a user dictionary based on their index and number in interaction dataset
    Required Input - 
        interactions - dataset create by create_interaction_matrix
    Expected Output -
        user_dict - Dictionary type output containing interaction_index as key and user_id as value
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict
    
def create_item_dict(df,id_col,name_col):
    '''
    Function to create an item dictionary based on their item_id and item name
    Required Input - 
        - df = Pandas dataframe with Item information
        - id_col = Column name containing unique identifier for an item
        - name_col = Column name containing name of the item
    Expected Output -
        item_dict = Dictionary type output containing item_id as key and item_name as value
    '''
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    Function to run matrix-factorization algorithm
    Required Input -
        - interactions = dataset create by create_interaction_matrix
        - n_components = number of embeddings you want to create to define Item and user
        - loss = loss function other options are logistic, brp
        - epoch = number of epochs to run 
        - n_jobs = number of cores used for execution 
    Expected Output  -
        Model - Trained model
    '''
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model

def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    Function to produce user recommendations
    Required Input - 
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - user_id = user ID for which we need to generate recommendation
        - user_dict = Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - threshold = value above which the rating is favorable in new interaction matrix
        - nrec_items = Number of output recommendation needed
    Expected Output - 
        - Prints list of items the given user has already bought
        - Prints list of N recommended items  which user hopefully will be interested in
    '''
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
								 .sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        #print("Known Likes:")
        #counter = 1
        #for i in known_items:
        #    print(str(counter) + '- ' + i)
        #    counter+=1

        #print("\n Recommended Items:")
        counter = 1
        item_dic ={}
        for i in scores:
            #print(str(counter) + '- ' + str(i) )
            #print(str(i))
            item_dic.update({counter: str(i)})
            counter+=1
        print(item_dic)
    return return_score_list
    

def sample_recommendation_item(model,interactions,item_id,user_dict,item_dict,number_of_user):
    '''
    Funnction to produce a list of top N interested users for a given item
    Required Input -
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - item_id = item ID for which we need to generate recommended users
        - user_dict =  Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - number_of_user = Number of users needed as an output
    Expected Output -
        - user_list = List of recommended users 
    '''
    n_users, n_items = interactions.shape
    x = np.array(interactions.columns)
    scores = pd.Series(model.predict(np.arange(n_users), np.repeat(x.searchsorted(item_id),n_users)))
    user_list = list(interactions.index[scores.sort_values(ascending=False).head(number_of_user).index])
    return user_list 


def create_item_emdedding_distance_matrix(model,interactions):
    '''
    Function to create item-item distance embedding matrix
    Required Input -
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
    Expected Output -
        - item_emdedding_distance_matrix = Pandas dataframe containing cosine distance matrix b/w items
    '''
    df_item_norm_sparse = sparse.csr_matrix(model.item_embeddings)
    similarities = cosine_similarity(df_item_norm_sparse)
    item_emdedding_distance_matrix = pd.DataFrame(similarities)
    item_emdedding_distance_matrix.columns = interactions.columns
    item_emdedding_distance_matrix.index = interactions.columns
    return item_emdedding_distance_matrix

def item_item_recommendation(item_emdedding_distance_matrix, item_id, 
                             item_dict, n_items = 10, show = True):
    '''
    Function to create item-item recommendation
    Required Input - 
        - item_emdedding_distance_matrix = Pandas dataframe containing cosine distance matrix b/w items
        - item_id  = item ID for which we need to generate recommended items
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - n_items = Number of items needed as an output
    Expected Output -
        - recommended_items = List of recommended items
    '''
    recommended_items = list(pd.Series(item_emdedding_distance_matrix.loc[item_id,:]. \
                                  sort_values(ascending = False).head(n_items+1). \
                                  index[1:n_items+1]))
    if show == True:
        #print("Item of interest :{0}".format(item_dict[item_id]))
        #print("Item similar to the above item:")
        product_dic ={}
        counter = 1
        for i in recommended_items:
            #print(str(counter) + '- ' +  item_dict[i])
            product_dic.update({counter: item_dict[i]})
            counter+=1
        print(product_dic)
    return recommended_items

def random_item(num =10):
    #l = [801001366,801001497,801001479,801001425,801001378,801001386,704000206,801001533,801001868,801001441,704000204,704000140,801001658,801001521]
    #randomitem =  random.choice(l)
    #return randomitem
    s=['REVIVEREVIVE','CORDYCEPCORDYCEP','S.O.M.TIMECAPSULE','S.O.M.CMAX','REALELIXIRABALONECOLLAGEN','MAGIQUEYOUTHFULRADIANCE','S.O.M.CORDYTIBET&BHUTAN','LEKCAPPLEKCAPP','MAGIQUENOBLEWHITE','S.O.M.LINGZHISUN','S.O.M.I-KARE','PURECOLLAGENPURECOLLAGEN','S.O.M.TIMECAPSULE','MAGIQUEGRAVITAS','ULTIMATECOLLAGENULTIMATECOLLAGEN','BONBACKBONBACK','S.O.M.S-BALANCE','REVIVEREVIVEBLACKSHINE','MYCARDIMYCARDI','DERAEYDERAEY']
    sampled_list = list(random.sample(s,num))

    random_dic = {}
    for i in range(len(sampled_list)):
        random_dic.update({i+1: sampled_list[i]})
    print(random_dic)  

### 2) Importing Data

In [None]:
ratings = pd.read_csv('D:/Dataset/DSRecommendation/V2/PurchaseHistory.csv',',' )
ratings.head()

In [None]:
ratings['BrandCollection'] = ratings['BrandCollection'].str.replace('|', '')
ratings['BrandCollection'] = ratings['BrandCollection'].str.upper()
ratings['BrandCollection'] = ratings['BrandCollection'].str.replace(' ', '')

In [None]:
ratings

In [None]:
movies = pd.read_csv('D:/Dataset/DSRecommendation/V2/ProductMasters.csv',',')
movies.head()

In [None]:
movies['BrandFamily'] = movies['BrandCollection']

In [None]:
movies

In [None]:
movies['BrandCollection'] = movies['BrandCollection'].str.replace('|', '')
movies['BrandCollection'] = movies['BrandCollection'].str.upper()
movies['BrandCollection'] = movies['BrandCollection'].str.replace(' ', '')

### 3) Preprocessing

#### 3.1) Create interaction matrix

In [None]:
interactions = create_interaction_matrix(df = ratings,
                                         user_col = 'Customer_offline',
                                         item_col = 'BrandCollection',
                                         rating_col = 'Quantity',
                                         threshold = '3')
interactions.shape

In [None]:
interactions.head()

#### 3.2) Create User Dict

In [None]:
user_dict = create_user_dict(interactions=interactions)

#### 3.3) Create Item dict

In [None]:
product_dict = create_item_dict(df = movies,
                               id_col = 'BrandCollection',
                               name_col = 'BrandFamily')

In [None]:
product_dict

In [None]:
interactions.values

In [None]:
##interactions = np.array(interactions, dtype=float)
#interac = interactions
##features.astype(np.float)
x = sparse.csr_matrix(interactions.values)


In [None]:
model = LightFM(no_components= 10, loss='warp',k=10)
 # model.fit(x,epochs=epoch,num_threads = n_jobs)

In [None]:
mf_model = model.fit(x,epochs=5,num_threads =4,verbose=2)


In [None]:
from lightfm.evaluation import auc_score

NUM_THREADS = 2
NUM_COMPONENTS = 30
NUM_EPOCHS = 3
ITEM_ALPHA = 1e-6

# Compute and print the AUC score
train_auc = auc_score(mf_model, x, num_threads=NUM_THREADS).mean()
print('Collaborative filtering train AUC: %s' % train_auc)

In [None]:
import pickle

In [None]:
filename = 'd:/finalized_model_V2.sav'
pickle.dump(mf_model, open(filename, 'wb'))





### 4) Building Matrix Factorization model

In [None]:
#mf_model = runMF(interactions = interactions,
 #                n_components = 30,
#                 loss = 'warp',
#                 k = '15',
#                 epoch = 30)

In [None]:
loaded_model = pickle.load(open(filename, 'rb'))

In [None]:
user_dict

### 5) User Recommender

In [None]:
rec_list = sample_recommendation_user(model = loaded_model, 
                                      interactions = interactions, 
                                      user_id = 'CE0001178', 
                                      user_dict = user_dict,
                                      item_dict = product_dict, 
                                      threshold = 4,
                                      nrec_items = 10)

### 6) Item-User Recommender

In [None]:
sample_recommendation_item(model = loaded_model,
                           interactions = interactions,
                           item_id = 'ACTIVISACTIVIS',
                           user_dict = user_dict,
                           item_dict = product_dict,
                           number_of_user = 15)

### 7) Item - Item Recommender

In [None]:
item_item_dist = create_item_emdedding_distance_matrix(model = loaded_model,
                                                       interactions = interactions)

In [None]:
filename = 'd:/ItemMatrix_V2.sav'
pickle.dump(item_item_dist, open(filename, 'wb'))

In [None]:
itemMatrix = pickle.load(open('d:/ItemMatrix_V2.sav', 'rb'))

In [None]:
filename = 'd:/ProductDict_V2.sav'
pickle.dump(product_dict, open(filename, 'wb'))

In [None]:
filename = 'd:/UserDict_V2.sav'
pickle.dump(user_dict, open(filename, 'wb'))

In [None]:
filename = 'd:/InterActions_V2.sav'
pickle.dump(interactions, open(filename, 'wb'))

In [None]:
product_dict

In [None]:
rec_list = item_item_recommendation(item_emdedding_distance_matrix = itemMatrix,
                                    item_id = 'DDNEEDDNEE',
                                    item_dict = product_dict,
                                    n_items = 20)

In [None]:
product_dict

In [None]:
recommended_items = list(pd.Series(itemMatrix.loc[801001026,:]. \
                                  sort_values(ascending = False).head(10+1). \
                                  index[1:10+1]))

In [None]:
recommended_items

In [None]:
s = ['Revive|Revive', 'Cordycep|Cordycep','S.O.M.|Time Capsule','S.O.M.|C Max','Real Elixir|Abalone Collagen','Magique|Youthful Radiance','S.O.M.|Cordy Tibet & Bhutan',
'Lekcapp|Lekcapp','Magique|Noble White','S.O.M.|Lingzhi Sun','S.O.M.|I-Kare','Pure Collagen|Pure Collagen','S.O.M.|TIME CAPSULE','Magique|Gravitas','Ultimate Collagen|Ultimate Collagen',
'BONBACK|BONBACK','S.O.M.|S-Balance','Revive|Revive Black Shine','My Cardi|My Cardi','DERAEY|DERAEY']


In [None]:
 import random


In [None]:

s = ['Revive|Revive', 'Cordycep|Cordycep','S.O.M.|Time Capsule','S.O.M.|C Max','Real Elixir|Abalone Collagen','Magique|Youthful Radiance','S.O.M.|Cordy Tibet & Bhutan',
'Lekcapp|Lekcapp','Magique|Noble White','S.O.M.|Lingzhi Sun','S.O.M.|I-Kare','Pure Collagen|Pure Collagen','S.O.M.|TIME CAPSULE','Magique|Gravitas','Ultimate Collagen|Ultimate Collagen',
'BONBACK|BONBACK','S.O.M.|S-Balance','Revive|Revive Black Shine','My Cardi|My Cardi','DERAEY|DERAEY']
sampled_list = list(random.sample(s,5))

dic = {}
for i in range(len(s)):
    dic.update({i: sampled_list[i]})

print(dic)

   


In [None]:
dic