In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity



In [2]:
def create_interaction_matrix(df,user_col, item_col, rating_col):
    '''
    Function to create an interaction matrix dataframe from transactional type interactions
    Required Input -
        - df = Pandas DataFrame containing user-item interactions
        - user_col = column name containing user's identifier
        - item_col = column name containing item's identifier
        - rating col = column name containing user feedback on interaction with a given item
    Expected output - 
        - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
    '''
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    return interactions

In [3]:
def create_user_dict(interactions):
    '''
    Function to create a user dictionary based on their index and number in interaction dataset
    Required Input - 
        interactions - dataset create by create_interaction_matrix
    Expected Output -
        user_dict - Dictionary type output containing interaction_index as key and user_id as value
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [4]:
def create_item_dict(df,id_col,name_col):
    '''
    Function to create an item dictionary based on their item_id and item name
    Required Input - 
        - df = Pandas dataframe with Item information
        - id_col = Column name containing unique identifier for an item
        - name_col = Column name containing name of the item
    Expected Output -
        item_dict = Dictionary type output containing item_id as key and item_name as value
    '''
    
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

In [5]:
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    Function to run matrix-factorization algorithm
    Required Input -
        - interactions = dataset create by create_interaction_matrix
        - n_components = number of embeddings you want to create to define Item and user
        - loss = loss function other options are logistic, brp
        - epoch = number of epochs to run 
        - n_jobs = number of cores used for execution 
    Expected Output  -
        Model - Trained model
    '''
    x=interactions.values
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model

In [6]:
def recommend_merchant_to_user(model,interactions_trained ,interactions_current, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    Function to produce user recommendations
    Required Input - 
        - model = Trained matrix factorization model
        - interactions_trained = dataset used for training the model
        - interactions_current = Interaction matrix created by latest set of transactions
        - user_id = user ID for which we need to generate recommendation
        - user_dict = Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - threshold = value above which the rating is favorable in new interaction matrix
        - nrec_items = Number of output recommendation needed
    Return Output - 
        - return_score_list - List of recommended merchant IDs
        - merchants_recommended - List of recommended merchants
        - known_merchants - Merchants for which customer has already shopped
    '''
    n_users, n_items = interactions_trained.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions_cc.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_merchants = list(pd.Series(interactions_current.loc[user_id,:][interactions_current.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    
    scores = [x for x in scores if int(x) not in known_merchants]
    return_score_list = scores[0:nrec_items]
    known_merchants = list(pd.Series(known_merchants).apply(lambda x: item_dict[int(x)]))
    merchants_recommended = list(pd.Series(return_score_list).apply(lambda x: item_dict[int(x)]))
    return return_score_list,merchants_recommended,known_merchants

In [7]:
def create_merchant_emdedding_distance_matrix(model,interactions):
    '''
    Function to create item-item distance embedding matrix
    Required Input -
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
    Return Output -
        - item_emdedding_distance_matrix = Pandas dataframe containing cosine distance matrix b/w items
    '''
   
    df_item_norm_sparse = sparse.csr_matrix(model.item_embeddings)
    similarities = cosine_similarity(df_item_norm_sparse)
    item_emdedding_distance_matrix = pd.DataFrame(similarities)
    item_emdedding_distance_matrix.columns = interactions.columns
    item_emdedding_distance_matrix.index = interactions.columns
    return item_emdedding_distance_matrix

In [8]:
def recommend_merchant_to_merchant(item_emdedding_distance_matrix, item_id, 
                             item_dict, n_items = 10):
    '''
    Function to create item-item recommendation
    Required Input - 
        - item_emdedding_distance_matrix = Pandas dataframe containing cosine distance matrix b/w items
        - item_id  = item ID for which we need to generate recommended items
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - n_items = Number of items needed as an output
    Return Output -
        - recommended_merchantid = List of recommended Ids
        - final_merchants = List of recommended merchants
    '''
    recommended_merchantid = list(pd.Series(item_emdedding_distance_matrix.loc[str(item_id),:]. \
                                  sort_values(ascending = False).head(n_items+1). \
                                  index[1:n_items+1]))
    counter = 1
    final_merchants=[]
    for i in recommended_merchantid:
        final_merchants.append(item_dict[int(i)])
        counter+=1
    return recommended_merchantid,final_merchants

In [9]:
######################## load the transactional data set for training ##########

df_cc_trans=pd.read_csv(r'C:\Users\sinnsr\OneDrive - SAS\Projects and POCs\CC Hyper Personalization\Recommender\transaction_with_ratings.csv')
df_merchant=pd.read_csv(r'C:\Users\sinnsr\OneDrive - SAS\Projects and POCs\CC Hyper Personalization\Recommender\merchant_master.csv')

In [10]:
df_3 = pd.merge(df_cc_trans,df_merchant,on='merchant')

In [11]:
df_3

Unnamed: 0,custID,merchant,amount,transaction_date,transaction_unit,job,ratings,merchantid
0,50001,dominos,477,2/27/2020,1,housewife,3,1
1,50001,dominos,472,2/10/2020,1,housewife,2,1
2,50001,dominos,454,3/18/2020,1,housewife,2,1
3,59800,dominos,577,3/14/2020,1,retired,5,1
4,59800,dominos,355,2/16/2020,1,retired,5,1
...,...,...,...,...,...,...,...,...
185168,55292,Taj Hotels,3651,3/17/2020,1,admin.,4,13
185169,55292,Taj Hotels,4538,3/16/2020,1,admin.,3,13
185170,55292,Taj Hotels,4792,2/29/2020,1,admin.,5,13
185171,55292,Taj Hotels,7836,3/31/2020,1,admin.,4,13


In [12]:
################################## Model Training #################################
interactions_cc = create_interaction_matrix(df = df_3,
                                         user_col = 'custID',
                                         item_col = 'merchantid',
                                         rating_col = 'ratings'
                                         )

In [13]:
interactions_cc

merchantid,1,2,3,4,5,6,7,8,9,10,11,12,13
custID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
50001,16.0,17.0,26.0,13.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50002,2.0,15.0,2.0,1.0,5.0,0.0,0.0,9.0,10.0,0.0,4.0,8.0,0.0
50003,4.0,0.0,9.0,5.0,7.0,0.0,0.0,0.0,19.0,0.0,0.0,14.0,0.0
50004,0.0,0.0,0.0,0.0,3.0,13.0,0.0,5.0,0.0,12.0,0.0,4.0,13.0
50005,10.0,10.0,0.0,15.0,10.0,0.0,0.0,17.0,10.0,0.0,0.0,14.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
60277,10.0,5.0,0.0,5.0,12.0,0.0,0.0,12.0,4.0,0.0,5.0,0.0,0.0
60278,3.0,11.0,0.0,4.0,23.0,0.0,0.0,8.0,0.0,0.0,0.0,10.0,0.0
60279,0.0,34.0,5.0,22.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60280,15.0,9.0,21.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
user_dict = create_user_dict(interactions=interactions_cc)

In [15]:
user_dict

{50001: 0,
 50002: 1,
 50003: 2,
 50004: 3,
 50005: 4,
 50006: 5,
 50007: 6,
 50008: 7,
 50009: 8,
 50010: 9,
 50011: 10,
 50012: 11,
 50013: 12,
 50014: 13,
 50015: 14,
 50016: 15,
 50017: 16,
 50018: 17,
 50019: 18,
 50020: 19,
 50021: 20,
 50022: 21,
 50023: 22,
 50024: 23,
 50025: 24,
 50026: 25,
 50027: 26,
 50028: 27,
 50029: 28,
 50030: 29,
 50031: 30,
 50032: 31,
 50033: 32,
 50034: 33,
 50035: 34,
 50036: 35,
 50037: 36,
 50038: 37,
 50039: 38,
 50040: 39,
 50041: 40,
 50042: 41,
 50043: 42,
 50044: 43,
 50045: 44,
 50046: 45,
 50047: 46,
 50048: 47,
 50049: 48,
 50050: 49,
 50051: 50,
 50052: 51,
 50053: 52,
 50054: 53,
 50055: 54,
 50056: 55,
 50057: 56,
 50058: 57,
 50059: 58,
 50060: 59,
 50061: 60,
 50062: 61,
 50063: 62,
 50064: 63,
 50065: 64,
 50066: 65,
 50067: 66,
 50068: 67,
 50069: 68,
 50070: 69,
 50071: 70,
 50072: 71,
 50073: 72,
 50074: 73,
 50075: 74,
 50076: 75,
 50077: 76,
 50078: 77,
 50079: 78,
 50080: 79,
 50081: 80,
 50082: 81,
 50083: 82,
 50084: 83,
 5

In [16]:
merchant_dict = create_item_dict(df = df_merchant,
                               id_col = 'merchantid',
                               name_col = 'merchant')

In [17]:
merchant_dict

{1: 'dominos',
 2: 'Naturals',
 3: 'bookmyshow',
 4: 'airtel',
 5: 'DMART',
 6: 'Shoppersstop',
 7: 'Marks and Spencer',
 8: 'BigBazaar',
 9: 'Punjabibynature',
 10: 'LG',
 11: 'Starbucks',
 12: 'PVR',
 13: 'Taj Hotels'}

In [20]:
mf_model_cc = runMF(interactions = interactions_cc,
                 n_components = 30,
                 loss = 'warp',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4)

In [30]:
################ Saving the trained Model and Interaction Matrix  ######################
import pickle
pickle.dump(mf_model_cc, open('factmodel.pkl', 'wb'))
interactions_cc.to_csv('interactions.csv')
############################### End of Model Saving ##########################

NameError: name 'mf_model_cc' is not defined

### To be run in ID

In [31]:
# For ID runMF function is not needed 
#################################################################################3

df_cc_trans=pd.read_csv(r'C:\Users\sinnsr\OneDrive - SAS\Projects and POCs\CC Hyper Personalization\Recommender\Sample_User_transaction.csv')
df_merchant=pd.read_csv(r'C:\Users\sinnsr\OneDrive - SAS\Projects and POCs\CC Hyper Personalization\Recommender\merchant_master.csv')

In [32]:
df_3 = pd.merge(df_cc_trans,df_merchant,on='merchant')

In [33]:
interactions = create_interaction_matrix(df = df_3,
                                         user_col = 'custID',
                                         item_col = 'merchantid',
                                         rating_col = 'ratings'
                                         )

In [34]:
############################## open the model & Interaction Matrix ##############
mf_model_cc_rec = pickle.load(open('factmodel.pkl', 'rb'))
interactions_cc=pd.read_csv('interactions.csv',index_col='custID')
#################################################################################

In [35]:
######################## Get the user and merchant list##########################
user_dict = create_user_dict(interactions=interactions_cc)

In [36]:
merchant_dict = create_item_dict(df = df_merchant,
                               id_col = 'merchantid',
                               name_col = 'merchant')

In [37]:
############ Generate recommendation of merchants for a customer ##############
return_score_list,merchants_recommended,known_merchants = \
                                      recommend_merchant_to_user(model = mf_model_cc_rec, 
                                      interactions_trained=interactions_cc,
                                      interactions_current=interactions, 
                                      user_id = 50001, 
                                      user_dict = user_dict,
                                      item_dict = merchant_dict, 
                                      threshold = 1,
                                      nrec_items = 6)

In [38]:
return_score_list

['4', '5', '11', '12', '9', '6']

In [39]:
merchants_recommended

['airtel', 'DMART', 'Starbucks', 'PVR', 'Punjabibynature', 'Shoppersstop']

In [40]:
known_merchants

['bookmyshow', 'Naturals', 'dominos']

In [41]:
############### Compute the cosine similarity matrix #########################

merchant_merchant_dist = create_merchant_emdedding_distance_matrix(model = mf_model_cc_rec,
                                                       interactions = interactions_cc)

In [42]:
merchant_merchant_dist

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13
1,1.0,0.267217,0.180325,0.225461,-0.25776,-0.273508,-0.180985,-0.117667,-0.152872,-0.258668,-0.045406,-0.242891,-0.25538
2,0.267217,1.0,0.234878,0.181363,-0.146225,-0.151643,-0.129645,-0.245403,-0.194819,-0.122291,-0.119343,-0.133178,-0.611853
3,0.180325,0.234878,1.0,0.254323,-0.141323,-0.234104,-0.13651,-0.127698,0.005116,-0.604441,-0.205487,-0.225152,-0.20133
4,0.225461,0.181363,0.254323,1.0,-0.193783,-0.088226,-0.320261,-0.177008,-0.260498,-0.301212,-0.196403,-0.108415,-0.176425
5,-0.25776,-0.146225,-0.141323,-0.193783,1.0,-0.15656,-0.194223,-0.004376,-0.030356,0.041261,-0.009616,0.025692,-0.068191
6,-0.273508,-0.151643,-0.234104,-0.088226,-0.15656,1.0,0.2834,-0.112908,-0.194066,0.269194,-0.187892,-0.209839,0.264368
7,-0.180985,-0.129645,-0.13651,-0.320261,-0.194223,0.2834,1.0,-0.175172,-0.228916,0.329344,-0.284531,-0.073286,0.369692
8,-0.117667,-0.245403,-0.127698,-0.177008,-0.004376,-0.112908,-0.175172,1.0,-0.020356,0.02018,-0.015992,0.026024,-0.061817
9,-0.152872,-0.194819,0.005116,-0.260498,-0.030356,-0.194066,-0.228916,-0.020356,1.0,-0.006111,0.034313,0.055335,-0.033832
10,-0.258668,-0.122291,-0.604441,-0.301212,0.041261,0.269194,0.329344,0.02018,-0.006111,1.0,-0.048645,-0.192614,0.270562


In [94]:
############ Generate recommendation of merchant for spot offer ##############

recommended_merchantid,final_merchants = \
                                    recommend_merchant_to_merchant(item_emdedding_distance_matrix = merchant_merchant_dist,
                                    item_id = 3,
                                    item_dict = merchant_dict,
                                    n_items = 1)

##############################################################################

In [95]:
final_merchants

['airtel']

In [96]:
''.join(final_merchants)

'airtel'

In [57]:
recommended_merchantid

['1']

In [20]:
k = merchant_dict.get("dominos")

In [23]:
merchant_dict.keys()

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])

In [64]:
a = [k for k,v in merchant_dict.items() if v == 'Punjabibynature'][0]

In [69]:
a

9

In [70]:
type(a)

numpy.int64

In [74]:
b = a.tolist()

In [75]:
b

9

In [76]:
type(b)

int

In [78]:
merchant_dict.values()

dict_values(['dominos', 'Naturals', 'bookmyshow', 'airtel', 'DMART', 'Shoppersstop', 'Marks and Spencer', 'BigBazaar', 'Punjabibynature', 'LG', 'Starbucks', 'PVR', 'Taj Hotels'])

In [82]:
item_id = 0

In [91]:
for k,v in merchant_dict.items():
    if v == 'Punjabibynature':
        item_id = k.tolist()
        

In [92]:
type(item_id)

int