In [1]:
import pandas as pd
import pickle

# Import Feature Engineered Sales Transaction file
sales_df = pd.read_csv('Sales-Transactions-Edited.csv')

# Build Correlation Matrix for the Customer-Product relations (using User-User based recommendation)

In [2]:
# Find the total qty purchased by each customer of each product
prod_cust_qty_df = sales_df.groupby(['Product','Party']).agg({'Qty':'sum'})

# Reset the index by converting the Party and Product into columns
prod_cust_qty_df.reset_index(inplace=True)


# Find the no of unique customers purchased each product
prod_cust_count_df = sales_df.groupby(['Product']).agg({'Party':'nunique'})

# Set the customer count column
prod_cust_count_df.columns=['No_of_Customers']

# Reset the index by converting the Party and Product into columns
prod_cust_count_df.reset_index(inplace=True)


# Merge the unique customer count and qty purchased of each product
prod_cust_df = pd.merge(prod_cust_qty_df,prod_cust_count_df,how='inner',on='Product')


# Create a pivot table with all Customers on columns and Products on rows, and Qty as values
prod_cust_pivot_df = prod_cust_df.pivot(index='Product',columns='Party',values='Qty').fillna(0)

# Find the correlation between every two customers and build a correlation matrix using corr() method
# Used Spearman method in identifying the correlation. Pearson was not providing better results and Kendall is taking a long time for execution.
cust_correlation_df = prod_cust_pivot_df.corr(method='spearman',min_periods=5)
cust_correlation_df

Party,A.BHASKER-VGIRI,A.SRINIVASULU,A.SRINIVASULU-SPT,A.VENKATESWARLU-VB,A1 PLASTICS-SPT,ABC,ACK,ACR-PDKR,ADHI CHIKENSTALL-GD,AFOOZO PRIVATE LIMI,...,VVS-UGIRI,WHITE HORSE,WORTH INDUSTRIES,Y.MADHAVI-SYDHAPURA,Y.REDDY-NLR,Y.SIDHYYA-TALUPURU,YUVARAJ PLASTICS-AN,YVSR-ALURU,YVSR-VGIRI,YVT-PMR
Party,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A.BHASKER-VGIRI,1.000000,0.179402,0.290165,0.189122,0.215235,0.224988,0.164702,0.129353,-0.005939,0.165958,...,0.227788,0.015459,0.192573,0.094780,-0.011144,0.161556,0.320169,0.176028,0.284983,0.516661
A.SRINIVASULU,0.179402,1.000000,0.220971,0.099051,0.304534,0.192544,0.146889,-0.014816,-0.015741,0.004057,...,0.310457,0.087533,0.071593,0.064712,0.100155,0.317790,0.308290,0.212027,0.108637,0.227009
A.SRINIVASULU-SPT,0.290165,0.220971,1.000000,0.105688,0.357023,0.230247,0.161258,0.145594,-0.015009,0.045443,...,0.316875,0.056028,-0.015009,0.114559,-0.028160,0.411191,0.286657,0.178868,0.172154,0.229613
A.VENKATESWARLU-VB,0.189122,0.099051,0.105688,1.000000,0.209447,0.085305,0.283365,-0.012819,-0.004646,-0.010436,...,0.117794,-0.022596,-0.004646,0.260401,-0.008717,0.287065,0.265021,0.229515,-0.012377,-0.008717
A1 PLASTICS-SPT,0.215235,0.304534,0.357023,0.209447,1.000000,0.308351,0.227192,0.064483,-0.013692,0.011590,...,0.264070,0.034154,-0.013692,0.127360,0.020731,0.322809,0.397787,0.197634,0.064035,0.165050
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Y.SIDHYYA-TALUPURU,0.161556,0.317790,0.411191,0.287065,0.322809,0.252772,0.216142,0.015058,-0.010017,-0.022503,...,0.350972,0.085573,-0.010017,0.177190,-0.018794,1.000000,0.381788,0.384400,0.196172,0.231520
YUVARAJ PLASTICS-AN,0.320169,0.308290,0.286657,0.265021,0.397787,0.247608,0.404240,0.212832,-0.010993,0.125943,...,0.316673,0.045382,0.100853,0.219322,0.035781,0.381788,1.000000,0.243718,0.094173,0.212437
YVSR-ALURU,0.176028,0.212027,0.178868,0.229515,0.197634,0.323492,0.153540,-0.013605,-0.004930,0.095990,...,0.201395,0.081056,0.232276,0.246091,-0.009251,0.384400,0.243718,1.000000,-0.013136,-0.009251
YVSR-VGIRI,0.284983,0.108637,0.172154,-0.012377,0.064035,-0.031711,-0.027071,0.053074,-0.006167,-0.013854,...,0.174291,0.010337,-0.006167,-0.011571,-0.011571,0.196172,0.094173,-0.013136,1.000000,0.500745


# Write the Customer to Customer Correlation Matrix to a .csv file

In [3]:
cust_correlation_df.to_csv('Customer-Customer-Correlation-Matrix.csv')

# Create a Pickle (.pkl) file with the Correlation Matrix dataframe

In [4]:
pickle.dump(cust_correlation_df, open('cust_correlation_model.pkl','wb'))