In [None]:
pip install calplot

In [None]:
pip install apyori

### Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import calplot
import warnings
warnings.filterwarnings('ignore')

### Importing the dataset

In [None]:
grocery_dataset = pd.read_csv('../input/groceries-dataset/Groceries_dataset.csv')

In [None]:
grocery_dataset

In [None]:
print(grocery_dataset.info())

In [None]:
grocery_dataset

In [None]:
# Extracting data for grocery items sold in 2015
df2015 = grocery_dataset[pd.DatetimeIndex(grocery_dataset['Date']).year==2015].\
groupby(['Date']).size().reset_index(name='count').sort_values('Date',ascending=True)

In [None]:
df2015

In [None]:
# Visualizing the number of distribution of grocery items sold in 2015
plt.figure(figsize=(15,10))
df2015['Date'] = pd.DatetimeIndex(df2015['Date'])
df2015.set_index("Date", inplace = True,drop = True)
events = df2015["count"]
calplot.calplot(events,cmap='YlGn')
plt.title('Distribution of no. of items sold in the year 2015',fontsize=15)
plt.show()

In [None]:
# Extracting data for grocery items sold in 2014
df2014 = grocery_dataset[pd.DatetimeIndex(grocery_dataset['Date']).year==2014].\
groupby(['Date']).size().reset_index(name='count').sort_values('Date',ascending=True)

In [None]:
df2014

In [None]:
# Visualizing the number of distribution of grocery items sold in 2014
plt.figure(figsize=(15,10))
df2014['Date'] = pd.DatetimeIndex(df2014['Date'])
df2014.set_index("Date", inplace = True,drop = True)
events = df2014["count"]
calplot.calplot(events,cmap='YlGn')
plt.title('Distribution of no. of items sold in the year 2014',fontsize=15)
plt.show()

In [None]:
df = grocery_dataset.groupby(['itemDescription']).size().reset_index(name='count')

In [None]:
# Visualizing the distribution of items sold during 2014-15
df.loc[df['count'] < 500, 'itemDescription'] = 'Other items'
fig = px.pie(df, values='count',names = 'itemDescription',title='Distribution of items sold during 2014-15')
fig.show()

In [None]:
grocery_df = grocery_dataset.groupby('Date').agg({'itemDescription':','.join})

In [None]:
grocery_df

In [None]:
grocery_df['itemDescription'] = grocery_df['itemDescription'].str.split(',')

### Data preprocessing

In [None]:
transactions=[]
for i in range(0,len(grocery_df)):
    transactions.append(grocery_df['itemDescription'][i])

### Training the apriori model on the grocery items dataset

<div class="alert alert-block alert-info">
<b>Assumption:</b> The below rules contain items that have been sold for a minimum of 3 times in a day. Also, the minimum and maximum number of items in each rule is set to 3. These values are subject to changes basis business requirement.
</div>

In [None]:
from apyori import apriori
rules = apriori(transactions=transactions,min_support=0.0042,min_confidence=0.8,min_lift=3,
                min_length=3,max_length=3)

In [None]:
rules

In [None]:
results = list(rules)

In [None]:
len(results)

In [None]:
def inspect(results):
    baseItem   = [tuple(result[2][0][0])[0:] for result in results]
    addItem     = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(baseItem,addItem, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results),columns=['BaseItems','AdditionalItems','Supports',
                                                            'Confidences','Lifts'])

In [None]:
resultsinDataFrame

In [None]:
# Visualizing the distribution of add-on items sold with base products
plt.figure(figsize=(20,12))
sns.countplot(x='AdditionalItems',data=resultsinDataFrame)
plt.xlabel('Additional Items associated with base products',fontsize=20)
plt.ylabel('Count of additional items',fontsize=20)
plt.title('Distribution of additional items',fontsize=25)
plt.xticks(rotation=90,fontsize=20)
plt.yticks(fontsize=20)
plt.show()