In [1]:
import pandas as pd
import pickle

# Import Feature Engineered Sales Transaction file
sales_df = pd.read_csv('Sales-Transactions-Edited.csv')

# Build Correlation Matrix for the Product-Customer relations (using Item-Item based recommendation)

In [2]:
# Find the total qty purchased by each customer of each product
prod_cust_qty_df = sales_df.groupby(['Product','Party']).agg({'Qty':'sum'})

# Reset the index by converting the Party and Product into columns
prod_cust_qty_df.reset_index(inplace=True)


# Find the no of unique customers purchased each product
prod_cust_count_df = sales_df.groupby(['Product']).agg({'Party':'nunique'})

# Set the customer count column
prod_cust_count_df.columns=['No_of_Customers']

# Reset the index by converting the Party and Product into columns
prod_cust_count_df.reset_index(inplace=True)


# Merge the unique customer count and qty purchased of each product
prod_cust_df = pd.merge(prod_cust_qty_df,prod_cust_count_df,how='inner',on='Product')


# Create a pivot table with all Products on columns and Customers on rows, and Qty as values
prod_cust_pivot_df = prod_cust_df.pivot(index='Party',columns='Product',values='Qty').fillna(0)

# Find the correlation between every two products and build a correlation matrix using corr() method
# Used Spearman method in identifying the correlation. Pearson was not providing better results and Kendall is taking a long time for execution.
prod_correlation_df = prod_cust_pivot_df.corr(method='spearman',min_periods=5)
prod_correlation_df

Product,1.25 COOLDRINKS,"10"" CLASSIFOAM-1200","10"" ESSFOAM LOOSE","10"" GREEN","10"" SILVER HEAVY","10"" THERMOCOL PRINT",10*10 CITIZEN,10*10 DHAVAT,10*10 JANATHA,10*10 MORE,...,WATER DISPENSERS,WATER GLASS,WATER GLASS(300),WELCOME GLASS,WINE GLASS,ZEN-D CHEAP,ZEN-REALPACK,ZEND-1ST,ZEND-CLASSIC,ZEND-PREMIUM
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.25 COOLDRINKS,1.000000,-0.003998,-0.001990,-0.001406,-0.001406,-0.001990,0.375827,-0.002440,-0.001406,0.151802,...,-0.001406,0.133458,-0.003998,-0.002440,0.194644,-0.001990,-0.002819,-0.007444,-0.004243,-0.008265
"10"" CLASSIFOAM-1200",-0.003998,1.000000,-0.005658,-0.003998,-0.003998,-0.005658,-0.010622,-0.006934,-0.003998,-0.027995,...,-0.003998,0.076844,-0.011363,0.200803,0.120882,-0.005658,-0.008012,0.050261,-0.012061,0.039953
"10"" ESSFOAM LOOSE",-0.001990,-0.005658,1.000000,-0.001990,0.707604,-0.002817,0.266317,0.407961,-0.001990,0.100911,...,-0.001990,0.096486,0.247523,-0.003452,0.133641,-0.002817,-0.003989,0.132077,0.233867,-0.011696
"10"" GREEN",-0.001406,-0.003998,-0.001990,1.000000,-0.001406,-0.001990,-0.003737,-0.002440,-0.001406,-0.009849,...,-0.001406,-0.010298,-0.003998,0.574910,-0.007444,-0.001990,-0.002819,-0.007444,-0.004243,-0.008265
"10"" SILVER HEAVY",-0.001406,-0.003998,0.707604,-0.001406,1.000000,-0.001990,0.379563,0.578163,-0.001406,0.152230,...,-0.001406,0.146433,0.353301,-0.002440,0.196022,-0.001990,-0.002819,0.193815,0.334273,-0.008265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZEN-D CHEAP,-0.001990,-0.005658,-0.002817,-0.001990,-0.001990,-0.002817,-0.005289,-0.003452,-0.001990,0.097260,...,-0.001990,-0.014573,-0.005658,-0.003452,-0.010535,1.000000,-0.003989,0.126021,-0.006005,-0.011696
ZEN-REALPACK,-0.002819,-0.008012,-0.003989,-0.002819,-0.002819,-0.003989,-0.007490,-0.004889,-0.002819,-0.019739,...,-0.002819,-0.020639,0.169763,-0.004889,-0.014919,-0.003989,1.000000,0.085375,0.326244,0.075047
ZEND-1ST,-0.007444,0.050261,0.132077,-0.007444,0.193815,0.132663,0.130609,0.324392,-0.007444,0.038800,...,-0.007444,0.153291,0.123043,-0.012911,0.114505,0.126021,0.085375,1.000000,0.378314,0.276525
ZEND-CLASSIC,-0.004243,-0.012061,0.233867,-0.004243,0.334273,-0.006005,0.118297,0.188907,-0.004243,0.128772,...,-0.004243,0.124808,0.108720,-0.007360,0.113411,-0.006005,0.326244,0.378314,1.000000,0.099177


# Write the Product to Product Correlation Matrix to a .csv file

In [3]:
prod_correlation_df.to_csv('Product-Product-Correlation-Matrix.csv')

# Create a Pickle (.pkl) file with the Correlation Matrix dataframe

In [4]:
pickle.dump(prod_correlation_df, open('prod_correlation_model.pkl','wb'))