In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, apriori, association_rules
import json
import pickle
import openpyxl
from CommonFunctions1 import *

In [2]:
def user_cat_list_generator(df):
    '''Returns a list of categories for each user in which maximum
       amount is spent by the user'''
    
    df_cat = df.drop(['User_Id', 'job', 'credit_score','card_issue_date',
                      'card_type', 'best_card', 'rewards'], axis=1)
    cat_val = df_cat.values                               # Extracts only category wise spend of every user
    min_max_scaler = MinMaxScaler()                       # Scale the values along y axis in a range[0,1]
    cat_scaled = min_max_scaler.fit_transform(cat_val)
    cat_list = list(df_cat.columns)                       # Find out column names, i.e., category names
    user_cat_list = []
    
    # Iterate over every user, considering one user's list at a time
    for user in cat_scaled:
        user_cat = sorted(zip(user,cat_list),reverse=True)[:3]    # Sort user list and pick max 3 categories
        user_list = [cat[1] for cat in user_cat]                  # Extract cat no.
        user_cat_list.append(user_list)
        
    # Preparing DataFrame for FP Algo, kind of one hot encoder
    te = TransactionEncoder()
    te_ary = te.fit(user_cat_list).transform(user_cat_list)
    df_final = pd.DataFrame(te_ary, columns=te.columns_)
    return df_final, user_cat_list

In [3]:
def frequent_pattern_growth_algo(df, min_sup, metric = 'lift', threshold = 3.5):
    '''Returns the association rules by Frequent Pattern Growth Algo'''
    
    rules_fp = fpgrowth(df, min_support = min_sup, use_colnames = True)
    rules_fp = association_rules(rules_fp, metric = metric, min_threshold = threshold)
    return rules_fp

In [4]:
def dict_generator(rules):
    '''Compiles all the rules in a dictionary'''
    
    rules_left = list(np.array(rules[['antecedents']])[:, 0])
    rules_right = list(np.array(rules[['consequents']])[:, 0])
    new_dict = {}
    for i in range(len(rules_left)):
        new_dict[tuple(rules_left[i])] = tuple(rules_right[i])
        
    return new_dict

In [5]:
def association_generator(user_cat_list,cat_dict):
    '''Generates the list of association categories for every user'''
    
    user_association_list = []
    
    # Iterating on every user
    for user in user_cat_list:
        user_list = user
        
        # Find out all pairs of categories that may be present (max size -> 3)
        check_list = [ tuple([user[0]]), tuple([user[1]]), tuple([user[2]]),
                       tuple([user[0],user[1]]), tuple([user[0],user[2]]) ,tuple([user[1],user[2]]),
                       tuple([user[0],user[1],user[2]])
                     ]
        
        # Check if a group of cats is present in cat_dict which contains association categories, then add them too
        for check in check_list:
            if check in cat_dict.keys():
                user_list += cat_dict[check]
        user_list = list(set(user_list))         # Extracting only unique categories
        user_association_list.append(user_list)
    return user_association_list 

In [6]:
def find_suggestions(user_association_list, df, df_prod, factor = 1.5):
    '''Given the product database and interest categories of user, returns
    the list of 3 most favorable products to recommend to the user'''
    
    suggestion_list = []
    for i, user_list in enumerate(user_association_list):
        reward_points = df.iloc[i]['rewards'] * factor       # Find the reward point threshold
        
        # Find products in user's categories and which have reward points less then threshold
        df_user = df_prod.loc[ (df_prod['product_category'].isin(user_association_list[i])) & (df_prod['reward_points_required'] <= reward_points)]
        
        # Sort and find out top 3 products wrt to reward points
        df_user = df_user.sort_values('reward_points_required', ascending=False)
        val = df_user.values
        products = list(val[:3,0])
        suggestion_list.append((i,products))
    return suggestion_list

In [7]:
def main():
    df = pd.read_csv("Supervised database with rewards.csv")
    df_prod = pd.read_excel('Product_Database.xlsx')
    df_final, user_cat_list = user_cat_list_generator(df)    # Find user interested categories list
    rules_fp = frequent_pattern_growth_algo(df_final, 0.03)  # Association Rules
    fp_dict = dict_generator(rules_fp)                       # Dictionary containing rules
    user_association_list = association_generator(user_cat_list,fp_dict)       # Add association categories to user_list
    suggestion_list = find_suggestions(user_association_list, df, df_prod)     # Find suggestions
    return suggestion_list, user_association_list, user_cat_list, rules_fp

In [8]:
suggestion_list, b, c, d = main()

In [9]:
suggestion_list

[(0, [23, 30, 21]),
 (1, [55, 56, 54]),
 (2, [33, 41, 46]),
 (3, [52, 41, 51]),
 (4, [50, 55, 49]),
 (5, [52, 41, 51]),
 (6, [23, 30, 21]),
 (7, [34, 36, 16]),
 (8, [16, 52, 15]),
 (9, [8, 7, 34]),
 (10, [50, 49, 34]),
 (11, [50, 55, 49]),
 (12, [50, 49, 16]),
 (13, [50, 49, 34]),
 (14, [34, 36, 33]),
 (15, [34, 36, 33]),
 (16, [55, 56, 54]),
 (17, [34, 36, 16]),
 (18, [34, 36, 16]),
 (19, [50, 49, 16]),
 (20, [50, 49, 16]),
 (21, [49, 34, 36]),
 (22, [8, 7, 6]),
 (23, [50, 49, 34]),
 (24, [34, 36, 33]),
 (25, [34, 36, 16]),
 (26, [50, 49, 34]),
 (27, [50, 49, 52]),
 (28, [54, 34, 36]),
 (29, [34, 36, 16]),
 (30, [53, 52, 51]),
 (31, [34, 36, 33]),
 (32, [52, 33, 35]),
 (33, [8, 7, 34]),
 (34, [34, 36, 33]),
 (35, [55, 56, 54]),
 (36, [50, 23, 30]),
 (37, [50, 49, 52]),
 (38, [50, 49, 34]),
 (39, [16, 12, 15]),
 (40, [34, 36, 33]),
 (41, [28, 24, 27]),
 (42, [55, 56, 54]),
 (43, [49, 34, 36]),
 (44, [16, 52, 15]),
 (45, [50, 49, 34]),
 (46, [34, 36, 33]),
 (47, [53, 41, 44]),
 (48, [53