# Market Basket Analysis of Store Data

## Data Description

    1. Products purchase data over 7500 transactions at a retail store.
    2. We are using library(apyori) to calculate the association rule using Apriori.

## importing library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from apyori import apriori

## Reading and displaying data

In [2]:
data=pd.read_csv("Downloads/store_data.csv",header=None)

In [3]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


## Preprocessing on data

    As Apriori takes data in the form of list of lists

In [4]:
records=[]
for i in range(0,7501):
    records.append([str(data.values[i,j]) for j in range(0,20)])

In [5]:
for i in range(0,5):
    print(records[i])

['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil']
['burgers', 'meatballs', 'eggs', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']
['chutney', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']
['turkey', 'avocado', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']
['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']


## Apriori Algorithm

    Now its time to use the algorithm on our preprocessed data.
    we have to provide min_support,min_confidence,min_lift and min_length of sample set to find rules.

In [6]:
association_rules=apriori(records,min_support=0.005,min_confidence=0.2,min_lift=3,min_length=2)
association_results=list(association_rules)

## number of relations derived

In [7]:
print("Total number of derived relation are : {}".format(len(association_results)))

Total number of derived relation are : 38


## Association rules derived for items


In [8]:
for i in range(len(association_results)):
    print(association_results[i][0])

frozenset({'mushroom cream sauce', 'escalope'})
frozenset({'pasta', 'escalope'})
frozenset({'herb & pepper', 'ground beef'})
frozenset({'tomato sauce', 'ground beef'})
frozenset({'olive oil', 'whole wheat pasta'})
frozenset({'shrimp', 'pasta'})
frozenset({'shrimp', 'chocolate', 'frozen vegetables'})
frozenset({'mushroom cream sauce', 'nan', 'escalope'})
frozenset({'nan', 'pasta', 'escalope'})
frozenset({'frozen vegetables', 'ground beef', 'spaghetti'})
frozenset({'shrimp', 'mineral water', 'frozen vegetables'})
frozenset({'olive oil', 'frozen vegetables', 'spaghetti'})
frozenset({'shrimp', 'frozen vegetables', 'spaghetti'})
frozenset({'frozen vegetables', 'tomatoes', 'spaghetti'})
frozenset({'ground beef', 'grated cheese', 'spaghetti'})
frozenset({'mineral water', 'herb & pepper', 'ground beef'})
frozenset({'nan', 'herb & pepper', 'ground beef'})
frozenset({'herb & pepper', 'ground beef', 'spaghetti'})
frozenset({'nan', 'tomato sauce', 'ground beef'})
frozenset({'shrimp', 'ground beef'

## Rules Generated

In [9]:
for item in association_results:
    # index 0 of the item
    # Contains base item and add item
    
    print("Rule: " + list(item[0])[0] + " -> " + list(item[0])[1])

    # index 1 of the item contains support
    print("Support: " + str(item[1]))

    # index 2 of the item contains confidence and lift at sub-index [0][2] and [0][3] respectively.

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")

Rule: mushroom cream sauce -> escalope
Support: 0.005732568990801226
Confidence: 0.3006993006993007
Lift: 3.790832696715049
Rule: pasta -> escalope
Support: 0.005865884548726837
Confidence: 0.3728813559322034
Lift: 4.700811850163794
Rule: herb & pepper -> ground beef
Support: 0.015997866951073192
Confidence: 0.3234501347708895
Lift: 3.2919938411349285
Rule: tomato sauce -> ground beef
Support: 0.005332622317024397
Confidence: 0.3773584905660377
Lift: 3.840659481324083
Rule: olive oil -> whole wheat pasta
Support: 0.007998933475536596
Confidence: 0.2714932126696833
Lift: 4.122410097642296
Rule: shrimp -> pasta
Support: 0.005065991201173177
Confidence: 0.3220338983050847
Lift: 4.506672147735896
Rule: shrimp -> chocolate
Support: 0.005332622317024397
Confidence: 0.23255813953488375
Lift: 3.2545123221103784
Rule: mushroom cream sauce -> nan
Support: 0.005732568990801226
Confidence: 0.3006993006993007
Lift: 3.790832696715049
Rule: nan -> pasta
Support: 0.005865884548726837
Confidence: 0.372