In [315]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import association_rules, apriori

In [None]:
sales_data = pd.read_csv("datasets/sales_data.csv")
sales_data.head()

In [None]:
product = pd.read_csv("datasets/products.csv")
product.head()

# Data Wrangling

# Merge Data

In [None]:
sales_receipts_data = sales_data[["transaction_id", "transaction_date", "quantity", "customer_id", "product_id", "sales_outlet_id"]]
products_data = product[["product_id", "product_category", "product"]]

merged_dataset = pd.merge(sales_receipts_data, products_data, on="product_id", how="left")
merged_dataset.head()

In [None]:
##Remove sizes

merged_dataset[merged_dataset["product"].str.contains("Dark chocolate")]['product'].unique()

In [None]:
merged_dataset["product"].nunique() 


In [321]:
merged_dataset["product"] = merged_dataset["product"].str.replace("Rg", "")
merged_dataset["product"] = merged_dataset["product"].str.replace("Sm", "")
merged_dataset["product"] = merged_dataset["product"].str.replace("Lg", "")


In [None]:
print(merged_dataset["product"].unique())

In [None]:
merged_dataset["product"].nunique()

# Choose Product Subset

In [324]:
products_to_take = ['Cappuccino', 'Latte', 'Espresso shot',  \
                     'Dark chocolate','Sugar Free Vanilla syrup', 'Chocolate syrup',\
                    'Carmel syrup', 'Hazelnut syrup', 'Ginger Scone',  \
                    'Chocolate Croissant', 'Jumbo Savory Scone', 'Cranberry Scone', 'Hazelnut Biscotti',\
                    'Croissant', 'Almond Croissant', 'Oatmeal Scone', 'Chocolate Chip Biscotti',\
                    'Ginger Biscotti',\
                   ]

In [325]:
merged_dataset = merged_dataset[merged_dataset['product'].isin(products_to_take)]

In [None]:
merged_dataset["product"].nunique()

In [None]:
merged_dataset[['product','product_category']].drop_duplicates().reset_index(drop=True)

# Clean Transactions


In [328]:
merged_dataset["transaction"] = merged_dataset["transaction_id"].astype(str) +"_"+ merged_dataset['customer_id'].astype(str)
##Pushing this firebase and rag db



In [329]:
num_of_items_for_each_transaction = merged_dataset['transaction'].value_counts().reset_index() # grab the number of transaction for the lift calculation
num_of_items_for_each_transaction.head()
valid_transactions = num_of_items_for_each_transaction[(num_of_items_for_each_transaction['count']>1)]['transaction'].tolist()
##Exclude items with 1 item only because we can't calculate the role 
merged_dataset = merged_dataset[merged_dataset['transaction'].isin(valid_transactions)]

In [None]:
merged_dataset.shape

In [None]:
merged_dataset.head()

# Product Trends

In [None]:
merged_dataset["product_category"].value_counts()
merged_dataset["product"].value_counts()



# Popularity Recommendation Engine

In [333]:
product_recommendation = merged_dataset.groupby(["product", "product_category"]).count().reset_index()


In [334]:
product_recommendation = product_recommendation[["product", "product_category", "transaction_id"]]
product_recommendation = product_recommendation.rename(columns={"transaction_id": "number_of_transactions"})

In [None]:
product_recommendation.to_csv('api/recommendation_objects/')
product_recommendation.head()

In [336]:
product_recommendation.to_csv('api/recommendation_objects/popularity_recommendation.csv', index=False) #Export to CSV


# Apriori Recommendation Engine - Lifts

In [None]:
train_shopping_basket = (merged_dataset.groupby(['transaction', 'product'])['product'].count().reset_index(name='Count')) #groupby by transaction and by product -> count and reset the index 
#count how many times product shows in a transaction
train_shopping_basket.head()


In [None]:
#Long format in data wrangling

my_basket = train_shopping_basket.pivot_table(index="transaction",  columns="product", values="Count").fillna(0) # 1 0 0 0 1
my_basket.head()

In [None]:
def encode_units_to_longform(x):
    if x <=0:
        return 0
    
    if x >0 :
        return 1
    
my_basket_sets = my_basket.applymap(encode_units_to_longform) # apply to all cels
my_basket_sets.head(20)

In [None]:
frequent_items = apriori(my_basket_sets, min_support=0.05, use_colnames=True)
frequent_items.head() # Support min 0.05, 
#Association rule 

In [None]:
print(frequent_items)
rules_basket = association_rules(frequent_items, metric="lift", min_threshold=1, num_itemsets=len(frequent_items.index))
##Antecendets and Consequents
rules_basket.head()

In [343]:
# Save Rules Basket
rules_basket.to_pickle('rules_basket.pkl')

In [344]:
rules_basket[rules_basket['antecedents']=={"Latte"}].sort_values('confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
26,(Latte),(Sugar Free Vanilla syrup),0.281346,0.231906,0.085627,0.304348,1.312375,1.0,0.020381,1.104135,0.331206,0.200238,0.094313,0.336789
10,(Latte),(Carmel syrup),0.281346,0.219674,0.082569,0.293478,1.335973,1.0,0.020765,1.104462,0.349934,0.19732,0.094582,0.334674
17,(Latte),(Chocolate syrup),0.281346,0.223242,0.074924,0.266304,1.192898,1.0,0.012116,1.058693,0.225011,0.174377,0.055439,0.30096
24,(Latte),(Hazelnut syrup),0.281346,0.199796,0.073904,0.262681,1.314746,1.0,0.017692,1.085289,0.333118,0.181477,0.078586,0.31629
13,(Latte),(Chocolate Croissant),0.281346,0.165138,0.051988,0.184783,1.118961,1.0,0.005527,1.024098,0.147935,0.131783,0.023531,0.249799
18,(Latte),(Croissant),0.281346,0.141182,0.050459,0.179348,1.270326,1.0,0.010738,1.046506,0.29611,0.135616,0.044439,0.268374


# JSON Format

In [None]:
product_categories = merged_dataset[['product','product_category']].drop_duplicates().set_index('product').to_dict()['product_category']

In [None]:
recommendations_json = {}

antecedents = rules_basket['antecedents'].unique()
for antecedent in antecedents:
    df_rec = rules_basket[rules_basket['antecedents']==antecedent]
    df_rec = df_rec.sort_values('confidence',ascending=False)
    key = "_".join(antecedent)
    recommendations_json[key] = []
    for _, row in df_rec.iterrows():
        rec_objects =row['consequents']
        for rec_object in rec_objects:
            already_exists = False
            for current_rec_object in recommendations_json[key]:
                if rec_object == current_rec_object['product']:
                    already_exists=True
            if already_exists:
                continuex
            rec = {'product':rec_object, 
                   'product_category':product_categories[rec_object],
                   'confidence': row['confidence']
                  }
            recommendations_json[key].append(rec)

In [None]:

import json
with open('api/recommendation_objects/apriori_recommendations.json', 'w') as json_file:
    json.dump(recommendations_json, json_file)