ITEM BASED COLLABORATIVE FILTERING - IBCF

In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from jproperties import Properties


In [2]:
class ProdReco:
    
    def recommend_building(self,csv_file_path):
        #Item-Item similarity function
        def calculate_similarity(data_items):
            """Calculate the column-wise cosine similarity for a sparse
            matrix. Return a new dataframe matrix with similarities.
            """
            data_sparse = sparse.csr_matrix(data_items)
            similarities = cosine_similarity(data_sparse.transpose())
            sim = pd.DataFrame(data=similarities, index= data_items.columns, columns= data_items.columns)
            return sim

        #Read the csv File
        df = pd.read_csv(csv_file_path,na_values ='')
        #A matrix repersentation of Person x Products
        df_crossTabed = pd.crosstab(df.PERSON_ID, df.PRICEITEM_CD).reset_index().rename_axis('',axis='columns') 
        df_crossTabed1 = df_crossTabed.drop('PERSON_ID', 1)
        #Normalizing the values
        magnitude = np.sqrt(np.square(df_crossTabed1).sum(axis=1))
        df_crossTabed2 = df_crossTabed1.divide(magnitude, axis='index')
        #Calling the similarity Function for Item-Item Matrix
        data_matrix = calculate_similarity(df_crossTabed2)
        #Changing the datatype of Person_Id to str
        df_crossTabed['PERSON_ID'] = df_crossTabed.PERSON_ID.astype(str)
        data_items = df_crossTabed.drop('PERSON_ID', 1)
        data_items2 = data_items.divide(magnitude, axis='index')
        score = data_matrix.dot(data_items2.T)
        score =score.T
        score=score.div(data_matrix.sum(axis=1))
        bought_df=data_items2.where(data_items2==0,-999999)
        recommd_df=bought_df.where(bought_df != 0,score)
        new_col =df_crossTabed['PERSON_ID']
        recommd_df.insert(0,'PERSON_ID',new_col,True)
        #recommd_df -have to save in a table/datastore
        recommd_df.to_json("recommendation1.JSON",orient="columns")
        


    #Recommendation Function
    def recommended_execution(self,customer_no,no_products):
        recommd_df = pd.read_json(r'recommendation1.JSON')
        recommd_df['PERSON_ID'] = recommd_df.PERSON_ID.astype(str)
        data_items = recommd_df.drop('PERSON_ID', 1)
        user_index = recommd_df[recommd_df.PERSON_ID == customer_no].index.tolist()[0]
        rating = data_items.loc[user_index] 
        recommend_products=rating.nlargest(no_products)
        return  recommend_products

#Initialize the object for class prodReco
product_reco_object = ProdReco()
#get the configuration for the input dataset
configs = Properties()
with open('config.properties', 'rb') as config_file:
    configs.load(config_file)
csv_file_path = configs["filePath"].data
product_reco_object.recommend_building(csv_file_path)
#Recommendation for given CustomerNo=72231957, and the number of products to recommend as Nproducts=5
recommed_products= product_reco_object.recommended_execution('72231957',5)
print(recommed_products)

SIV123                            0.140681
DM_P2                             0.117106
AB_CCARDS                         0.076253
AB_CWN                            0.052254
AB_AMF                            0.048927
Name: 0, dtype: float64
