In [6]:
# ITEM BASED COLLABORATIVE FILTERING - IBCF

import pandas as pd
import numpy as np
from scipy import sparse
from jproperties import Properties
from sklearn.metrics.pairwise import cosine_similarity


def calculate_similarity(data_items):
    """Calculate the column-wise cosine similarity for a sparse
    matrix. Return a new dataframe matrix with similarities.
    :param :data_items
    """

    data_sparse = sparse.csr_matrix(data_items)
    similarities = cosine_similarity(data_sparse.transpose())
    similarity_data = pd.DataFrame(data=similarities, index=data_items.columns, columns=data_items.columns)
    return similarity_data


class ProdReco:
    def __init__(self, csv_path):
        self.dataset = pd.read_csv(csv_path, na_values='')

    def recommendation_building(self):
        """Calculate the recommendation for each customer
        . Saving the result dataframe in json file.
             """
        data = self.dataset
        dfcross_tabed = pd.crosstab(data.PERSON_ID, data.PRICEITEM_CD).reset_index().rename_axis('', axis='columns')
        dfcross_tabed1 = dfcross_tabed.drop('PERSON_ID', 1)
        item_product_data=data.groupby(['PRICEITEM_CD', 'PRODUCT_CD']).size().reset_index().rename(columns={0:'ITEM_OCCURANCE'})
        
        print(item_product_data.shape)
        item_product_data=item_product_data.dropna()
        print(item_product_data.shape)
        print(item_product_data)
        
        # Normalizing the values
        magnitude = np.sqrt(np.square(dfcross_tabed1).sum(axis=1))
        dfcross_tabed2 = dfcross_tabed1.divide(magnitude, axis='index')
        # Calling the similarity Function for Item-Item Matrix
        data_matrix = calculate_similarity(dfcross_tabed2)
        # Changing the datatype of Person_Id to str
        dfcross_tabed['PERSON_ID'] = dfcross_tabed.PERSON_ID.astype(str)
        data_items = dfcross_tabed.drop('PERSON_ID', 1)
        data_items2 = data_items.divide(magnitude, axis='index')
        scoredata = data_matrix.dot(data_items2.T)
        scoredata = scoredata.T
        scoredata = scoredata.div(data_matrix.sum(axis=1))
        bought_df = data_items2.where(data_items2 == 0, -999999)
        recommend_df = bought_df.where(bought_df != 0, scoredata)
        new_col = dfcross_tabed['PERSON_ID']
        recommend_df.insert(0, 'PERSON_ID', new_col, True)
        #recommend_df=recommend_df.append(item_product_data)
        recommend_df = pd.concat([recommend_df, item_product_data], axis=1, ignore_index=True).reset_index(drop=True, level=1)
        print(recommend_df)
        #path = "~/recommendation1.json"
        #full_path = os.path.expanduser(path)
        #print(full_path)
        #item_product_data.to_json("recommendation1.json", orient="columns")
        # recommd_df -have to save in a table/datastore
        recommend_df.to_json("recommendation1.json", orient="columns")
        print(recommend_df)
    def evaluation(self):
        """Finding the Evaluation Parameters for the recommendation system
        """
        # Read the csv File
        df = self.dataset
        dfprocessed = df.groupby(['PERSON_ID', 'PRICEITEM_CD']).size().reset_index().rename(columns={0: 'Score'})
        train = dfprocessed.sample(frac=0.70, random_state=123)  # random state is a seed value
        test_actual = dfprocessed.drop(train.index)
        test = test_actual.copy()
        # Convert already bought product count/score to 0 for testing purpose
        test.loc[:, 'Score'] = 0
        testdataset = pd.concat([train, test], ignore_index=True)
        custitemtable = testdataset.pivot(index='PERSON_ID', columns='PRICEITEM_CD', values='Score')
        custitemtable = custitemtable.replace(np.nan, 0)
        crosstableold = pd.crosstab(df.PERSON_ID, df.PRICEITEM_CD).reset_index().rename_axis('', axis='columns')
        custitemtable = custitemtable.merge(crosstableold["PERSON_ID"], on='PERSON_ID', how='left')
        df_cross_tabed1 = custitemtable.drop('PERSON_ID', 1)
        magnitude = np.sqrt(np.square(df_cross_tabed1).sum(axis=1))
        df_cross_tabed2 = df_cross_tabed1.divide(magnitude, axis='index')
        df_cross_tabed2 = df_cross_tabed2.replace(np.nan, 0)
        data_matrix = calculate_similarity(df_cross_tabed2)
        data_matrix = data_matrix.replace(np.nan, 0)
        scoredf = data_matrix.dot(df_cross_tabed2.T)
        scoredf = scoredf.T
        scoredf = scoredf.div(data_matrix.sum(axis=1))
        scoredf = scoredf.replace(np.nan, 0)
        bought_df = df_cross_tabed2.where(df_cross_tabed2 == 0, -999999)
        recommd_df = bought_df.where(bought_df != 0, scoredf)
        evalconstant = 2
        evaluation_metrices = {}
        df_final = pd.DataFrame({n: recommd_df.T[col].nlargest(evalconstant).index.tolist()
                                 for n, col in enumerate(recommd_df.T)}).T

        new_col = custitemtable['PERSON_ID']
        df_final.insert(0, 'PERSON_ID', new_col, True)
        test_actual_cross = pd.crosstab(test_actual.PERSON_ID, test_actual.PRICEITEM_CD).reset_index().rename_axis('',
                                                                                                                   axis='columns')
        test_person_id = test_actual_cross['PERSON_ID']
        test_actual_cross = test_actual_cross.drop(['PERSON_ID'], axis=1)
        test_actual_cross_df = test_actual_cross.where(test_actual_cross == 0, -999999)
        actual_bought_list = (test_actual_cross_df == -999999).sum(axis=1)
        test_final = pd.DataFrame({n: test_actual_cross.T[col].nlargest(evalconstant).index.tolist()
                                   for n, col in enumerate(test_actual_cross.T)}).T
        test_final.insert(0, 'PERSON_ID', test_person_id, True)
        mered_df = test_final.merge(df_final, on="PERSON_ID", how='inner')
        mered_df_selected = mered_df.loc[:, mered_df.columns != 'PERSON_ID']
        # Calculating the Precision
        mered_df['num_uniq'] = [len(set(v[pd.notna(v)].tolist())) for v in mered_df_selected.values]
        mered_df['num_uniq'] = (evalconstant * 2) - mered_df['num_uniq']
        mered_df['precision'] = mered_df['num_uniq'] / evalconstant
        meanprecision = mered_df['precision'].mean()
        evaluation_metrices["Precision"] = meanprecision
        # Calculating the Recall
        row_count = len(test_final.index)
        mered_df['ActualBought'] = actual_bought_list
        actal_bought_common = []
        for loopvar in range(0, row_count):
            elements = []
            actual_bought = actual_bought_list[loopvar]
            startindex = evalconstant + 1
            end_index = evalconstant * 2 + 1
            elements.extend(test_final.iloc[loopvar, 1:actual_bought + 1])
            elements.extend(mered_df.iloc[loopvar, startindex:end_index])
            a_set = set(elements)
            number_unique = len(a_set)
            actal_common = (actual_bought + evalconstant) - number_unique
            actal_bought_common.append(actal_common)
        mered_df['ActualBoughtPredicted'] = actal_bought_common
        mered_df['ActualBought'] = mered_df['ActualBought'].replace(np.nan, 0)
        mered_df.loc[mered_df.ActualBought > evalconstant, 'ActualBought'] = evalconstant
        mered_df['Recall'] = mered_df['ActualBoughtPredicted'] / mered_df['ActualBought']
        mered_df['Recall'] = mered_df['Recall'].replace([np.inf, -np.inf], 0).fillna(0)
        meanrecall = mered_df['Recall'].mean()
        evaluation_metrices["Recall"] = meanrecall
        mean_f1score = 2 * (meanrecall * meanprecision) / (meanrecall + meanprecision)
        evaluation_metrices["F1-Score"] = mean_f1score
        print(evaluation_metrices)
        return evaluation_metrices

    @staticmethod
    def recommended_execution(customer_no, no_products):
        """Give the product recommendation for a customer based on the input .
                     """
        recommd_df = pd.read_json(r'recommendation1.json')
        item_product_df['PRICEITEM_CD']=recommd_df.PRICEITEM_CD
        item_product_df['PRODUCT_CD']= recommd_df.PRODUCT_CD
        recommd_df=recommd_df.drop(['PRICEITEM_CD','PRODUCT_CD','COUNT'],axis =1)
        #item_product_df = pd.read_json(r'item_product_mapping.json')
        recommd_df['PERSON_ID'] = recommd_df.PERSON_ID.astype(str)
        data_items = recommd_df.drop('PERSON_ID', 1)
        user_index = recommd_df[recommd_df.PERSON_ID == customer_no].index.tolist()[0]
        rating = data_items.loc[user_index]
        print("----dataitems-------")
        print(data_items)
        print(type(rating))
        print(rating)
        sorted_series = rating.sort_values(ascending=False)
        print(sorted_series)
        selectd_rows = sorted_series[sorted_series>0]
        print("-----greater than zeres--------")
        print(selectd_rows)
        recommended_item_list =selectd_rows.index
        item_product_df =item_product_df.loc[item_product_df['PRICEITEM_CD'].isin(recommended_item_list)]
        print(item_product_df)
        recommend_products_df =pd.DataFrame()
        recommend_products_df['PRICEITEM_CD']=list(selectd_rows.index)
        recommend_products_df['SCORE']=list(selectd_rows.values)
        merged_data = recommend_products_df.merge(item_product_df, on="PRICEITEM_CD", how = 'left')
        print(merged_data)
        
        
        merged_data=merged_data.dropna()
        print(merged_data)
        print(type(merged_data))
        
        
        
        product_series = merged_data['PRODUCT_CD']
        sorted_product=product_series.value_counts(normalize=False)
        print(sorted_product)
        print(type(sorted_product))
        d = dict(zip(sorted_product.index,sorted_product.values))
        merged_data['COUNT'] = merged_data['PRODUCT_CD'].map(d)
        #merged_productcount = merged_data.merge(sorted_product, on="PRODUCT_CD", how = 'left')
        print(merged_data)
        merged_data['SCORE_UPDATED'] = merged_data['SCORE']*merged_data['COUNT']
        print(merged_data)
        recommended_products_scoredf = merged_data.sort_values(by = 'SCORE_UPDATED',ascending = False) 
        print(recommended_products_scoredf)
        recommended_products_score_df = recommended_products_scoredf.drop_duplicates(subset=['PRODUCT_CD'], keep='first') 
        product_data = recommended_products_score_df.drop(['PRICEITEM_CD','SCORE','COUNT'],axis =1)
        #print(product_data)
        
        if len(product_data)+1 <= no_products:
            print("reduce recommendation input")
            recommend_products_series=None
        else:
            recommend_products_series =product_data.head(no_products)
            print(recommend_products_series)
            
        #recommend_products_series = rating.nlargest(no_products)
        #print(recommend_products_series)
        #recommended_item_list =recommend_products_series.index
        #item_product_df =item_product_df.loc[item_product_df['PRICEITEM_CD'].isin(recommended_item_list)]
        #print(item_product_df)
        #recommend_products_df =pd.DataFrame()
        #recommend_products_df['PRICEITEM_CD']=list(recommend_products_series.index)
        #recommend_products_df['SCORE']=list(recommend_products_series.values)
        #merged_data = recommend_products_df.merge(item_product_df, on="PRICEITEM_CD", how = 'left')
        #print(merged_data)
        #product_series = item_product_df['PRODUCT_CD']
        #sorted_product=product_series.value_counts(normalize=False)
        #sorted_product =.value_counts(product_series, normalize=False, sort=True, ascending=False, bins=None, dropna=True)
        #recommended_products_scoredf = merged_data.groupby('PRODUCT_CD')['SCORE'].max().reset_index()
        #recommended_products_scoredf = recommended_products_scoredf.sort_values(by = 'SCORE',ascending = False) 
        #print(recommended_products_scoredf)
        #print(recommend_products_series.iloc[0])
        #print(sorted_product)
        return recommend_products_series


configs = Properties()
with open('config.properties', 'rb') as config_file:
    configs.load(config_file)
csvfilepath = configs["filePath"].data
objectProductReco = ProdReco(csvfilepath)
print(objectProductReco.dataset.shape)
objectProductReco.recommendation_building()
objectProductReco.evaluation()
# Recommendation for given customer_no=72231957, and the number of products to recommend as no_products=5
recommend_products = objectProductReco.recommended_execution('72231957', 6)
#print(recommend_products)


(384, 25)
(27, 3)
(27, 3)
                      PRICEITEM_CD                      PRODUCT_CD  \
0   AB_AMF                          APPTEST2                         
1   AB_AMF                          BTA                              
2   AB_AMF                          BTA1                             
3   AB_AOF                          APPTEST2                         
4   AB_AOF                          BTA                              
5   AB_AOF                          BTA1                             
6   AB_CCARDS                       BTA                              
7   AB_CCARDS                       BTA1                             
8   AB_CWN                          APPTEST2                         
9   AB_CWN                          BTA                              
10  AB_CWN                          BTA1                             
11  ACHCR                           ABC_BNK_STD_PROD                 
12  ACHDR                           ABC_BNK_STD_PROD            

IndexError: Too many levels: Index has only 1 level, not 2

In [None]:
ABC_BNK_STD_PROD  7
