In [1]:
import pandas as pd

data = pd.read_csv('/content/data.csv')
print(data)

             Date  Voucher  ...  Qty    Rate
0      2017-04-01        1  ...    2  1690.0
1      2017-04-01        1  ...    6  1620.0
2      2017-04-01        2  ...  500    23.0
3      2017-04-01        2  ...    6  1620.0
4      2017-04-01        2  ...    5  1690.0
...           ...      ...  ...  ...     ...
95557  2019-10-10     4935  ...  140    26.0
95558  2019-10-10     4935  ...  600     8.4
95559  2019-10-10     4935  ...  320    16.0
95560  2019-10-10     4935  ...  800     8.5
95561  2019-10-10     4935  ...  400    16.0

[95562 rows x 6 columns]


# using User-User based recommendation
##Building Correlation Matrix for the Customer-Product relations 

In [5]:
# Finding the total qty purchased by each customer of each product
prod_cust_qty = data.groupby(['Product','Party']).agg({'Qty':'sum'})

# Reset the index by converting the Party and Product into columns
prod_cust_qty.reset_index(inplace=True)

**Finding the no of unique customers purchased each product**

In [6]:
prod_cust_count = data.groupby(['Product']).agg({'Party':'nunique'})

# Set the customer count column
prod_cust_count.columns=['No_of_Customers']

# Reset the index by converting the Party and Product into columns
prod_cust_count.reset_index(inplace=True)

**Merge the unique customer count and qty purchased of each product**

In [9]:
product_customer = pd.merge(prod_cust_qty , prod_cust_count,how='inner',on='Product')
print(product_customer)

                   Product                Party  Qty  No_of_Customers
0          1.25 COOLDRINKS   BRHAMADEVI SUDHEER    2                1
1      10" CLASSIFOAM-1200      BALAJI PLASTICS    1                8
2      10" CLASSIFOAM-1200  JAGADHAMBA PLASTICS    1                8
3      10" CLASSIFOAM-1200                  KNC   12                8
4      10" CLASSIFOAM-1200                  SMS   10                8
...                    ...                  ...  ...              ...
19722         ZEND-PREMIUM           SVP-BUCHHI   25               34
19723         ZEND-PREMIUM  SWATHI PROVISIONS-B   10               34
19724         ZEND-PREMIUM  T.SRINIVASULU(TST)-   35               34
19725         ZEND-PREMIUM         TVT-CHITWELI   10               34
19726         ZEND-PREMIUM  VENKATESWARA PROVIS    0               34

[19727 rows x 4 columns]


**# Creating a pivot table**

In [10]:
prod_cust_pivot = product_customer.pivot(index='Product',columns='Party',values='Qty').fillna(0)
print(prod_cust_pivot)

Party                A.BHASKER-VGIRI  A.SRINIVASULU  ...  YVSR-VGIRI  YVT-PMR
Product                                              ...                     
1.25 COOLDRINKS                  0.0            0.0  ...         0.0      0.0
10" CLASSIFOAM-1200              0.0            0.0  ...         0.0      0.0
10" ESSFOAM LOOSE                0.0            0.0  ...         0.0      0.0
10" GREEN                        0.0            0.0  ...         0.0      0.0
10" SILVER HEAVY                 0.0            0.0  ...         0.0      0.0
...                              ...            ...  ...         ...      ...
ZEN-D CHEAP                      0.0            0.0  ...         0.0      0.0
ZEN-REALPACK                     0.0            0.0  ...         0.0      0.0
ZEND-1ST                         0.0          100.0  ...         0.0      0.0
ZEND-CLASSIC                     0.0            0.0  ...         0.0      0.0
ZEND-PREMIUM                     0.0           25.0  ...        

## Finding the correlation between every two customers and build a correlation matrix using corr method

In [11]:
# Spearman correlation
cust_correlation = prod_cust_pivot.corr(method='spearman',min_periods=5)

print(cust_correlation.head(10))

Party                A.BHASKER-VGIRI  A.SRINIVASULU  ...  YVSR-VGIRI   YVT-PMR
Party                                                ...                      
A.BHASKER-VGIRI             1.000000       0.179402  ...    0.284983  0.516661
A.SRINIVASULU               0.179402       1.000000  ...    0.108637  0.227009
A.SRINIVASULU-SPT           0.290165       0.220971  ...    0.172154  0.229613
A.VENKATESWARLU-VB          0.189122       0.099051  ...   -0.012377 -0.008717
A1 PLASTICS-SPT             0.215235       0.304534  ...    0.064035  0.165050
ABC                         0.224988       0.192544  ...   -0.031711  0.035299
ACK                         0.164702       0.146889  ...   -0.027071  0.043792
ACR-PDKR                    0.129353      -0.014816  ...    0.053074  0.086059
ADHI CHIKENSTALL-GD        -0.005939      -0.015741  ...   -0.006167 -0.004343
AFOOZO PRIVATE LIMI         0.165958       0.004057  ...   -0.013854 -0.009757

[10 rows x 712 columns]


**Store to CSV and Pickle file**

In [13]:
cust_correlation.to_csv('Customer-Customer-Correlation-Matrix.csv')

import pickle
pickle.dump(cust_correlation, open('cust_correlation_model.pkl','wb'))


# using Item-Item based recommendation
### Building Correlation Matrix for the Product and Customer relations 

In [14]:
# Find the total qty purchased by each customer of each product
prod_cust_qty = data.groupby(['Product','Party']).agg({'Qty':'sum'})

# Reset the index by converting the Party and Product into columns
prod_cust_qty.reset_index(inplace=True)


# Find the no of unique customers purchased each product
prod_cust_count = data.groupby(['Product']).agg({'Party':'nunique'})

# Set the customer count column
prod_cust_count.columns=['No_of_Customers']

# Reset the index by converting the Party and Product into columns
prod_cust_count.reset_index(inplace=True)


# Merge the unique customer count and qty purchased of each product
prod_cust = pd.merge(prod_cust_qty , prod_cust_count,how='inner',on='Product')
print(prod_cust)

# Create a pivot table with all Products on columns and Customers on rows, and Qty as values
prod_cust_pivot = prod_cust.pivot(index='Party',columns='Product',values='Qty').fillna(0)
print(prod_cust_pivot)

                   Product                Party  Qty  No_of_Customers
0          1.25 COOLDRINKS   BRHAMADEVI SUDHEER    2                1
1      10" CLASSIFOAM-1200      BALAJI PLASTICS    1                8
2      10" CLASSIFOAM-1200  JAGADHAMBA PLASTICS    1                8
3      10" CLASSIFOAM-1200                  KNC   12                8
4      10" CLASSIFOAM-1200                  SMS   10                8
...                    ...                  ...  ...              ...
19722         ZEND-PREMIUM           SVP-BUCHHI   25               34
19723         ZEND-PREMIUM  SWATHI PROVISIONS-B   10               34
19724         ZEND-PREMIUM  T.SRINIVASULU(TST)-   35               34
19725         ZEND-PREMIUM         TVT-CHITWELI   10               34
19726         ZEND-PREMIUM  VENKATESWARA PROVIS    0               34

[19727 rows x 4 columns]
Product              1.25 COOLDRINKS  ...  ZEND-PREMIUM
Party                                 ...              
A.BHASKER-VGIRI       

## Finding the correlation between every two products and build a correlation matrix using corr method

In [15]:
## Spearman correlation
prod_correlation = prod_cust_pivot.corr(method='spearman',min_periods=5)
print(prod_correlation.head(10))

Product              1.25 COOLDRINKS  ...  ZEND-PREMIUM
Product                               ...              
1.25 COOLDRINKS             1.000000  ...     -0.008265
10" CLASSIFOAM-1200        -0.003998  ...      0.039953
10" ESSFOAM LOOSE          -0.001990  ...     -0.011696
10" GREEN                  -0.001406  ...     -0.008265
10" SILVER HEAVY           -0.001406  ...     -0.008265
10" THERMOCOL PRINT        -0.001990  ...      0.114659
10*10 CITIZEN               0.375827  ...      0.046676
10*10 DHAVAT               -0.002440  ...      0.089773
10*10 JANATHA              -0.001406  ...     -0.008265
10*10 MORE                  0.151802  ...      0.080424

[10 rows x 866 columns]


**Store to CSV and Pickle file**

In [16]:
# To Csv file
prod_correlation.to_csv('Product-Product-Correlation-Matrix.csv')

#To Pickle File
pickle.dump(prod_correlation, open('prod_correlation_model.pkl','wb'))
