#### Getting the Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#### Importing the Dataset

In [None]:
groceries = pd.read_csv('../input/groceries-dataset/Groceries_dataset.csv')

#### Dataset Description

The dataset has 38765 rows and 3 columns of the purchase orders of people from the grocery stores.

* Member_number - ID of the customer
* Date - Purchase Order Date
* ItemDescription - Description of the purchase item

In [None]:
groceries.head()

In [None]:
groceries.info()

In [None]:
sns.heatmap(groceries.isnull())

There aren't null values

### Data Pre Processing

Extracting some more variables from the original columns

In [None]:
groceries.head()
groceries['DateTime'] = pd.to_datetime(groceries['Date'])

In [None]:
#date variables

groceries['Month'] = groceries['DateTime'].dt.month
groceries['Year'] = groceries['DateTime'].dt.year
groceries['Day'] = groceries['DateTime'].dt.day
groceries['WeekDay'] = groceries['DateTime'].dt.weekday
groceries['WeekDay'] = groceries['WeekDay'].replace((0,1,2,3,4,5,6), 
('Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'))

In [None]:
#transaction id

groceries['Transaction'] = groceries.groupby(['Member_number', 'Date']).grouper.group_info[0]

In [None]:
groceries.head()

### Data Visualization

#### Top 25 sold items

In [None]:
fig = plt.figure(figsize=(15,5))
colors = sns.color_palette("YlOrRd", 25)
names = groceries.itemDescription.value_counts().head(25).index
values = groceries.itemDescription.value_counts().head(25)
sns.barplot(x = names, y = values, palette = colors)
plt.xticks(rotation=60)

#### Transactions By Month

In [None]:
countByMonth = groceries.groupby('Month')['Transaction'].count().reset_index()
countByMonth.sort_values('Month',inplace=True)

In [None]:
colors = sns.color_palette("YlOrRd", 18)
fig = plt.figure(figsize=(12,5))
sns.barplot(x=countByMonth['Month'], y=countByMonth['Transaction'], palette = colors)

#### Transactions by Weekday

In [None]:
countByWeekday = groceries.groupby('WeekDay')['Transaction'].count().reset_index()
countByWeekday.loc[:,"orderOfDays"] = [4,0,5,6,3,1,2]
countByWeekday.sort_values("orderOfDays",inplace=True)

In [None]:
colors = sns.color_palette("YlOrRd", 7)
fig = plt.figure(figsize=(12,5))
sns.barplot(x=countByWeekday['WeekDay'], y=countByWeekday['Transaction'], palette = colors)

#### Transactions By Month Day

In [None]:
countByDay = groceries.groupby('Day')['Transaction'].count().reset_index()
countByDay.sort_values('Day',inplace=True)

In [None]:
colors = sns.color_palette("YlOrRd", 31)
fig = plt.figure(figsize=(12,5))
sns.barplot(x=countByDay['Day'], y=countByDay['Transaction'], palette = colors)

### Apriori Algorithm

In [None]:
transactions = groceries.groupby(['Transaction','itemDescription'])['itemDescription'].count().reset_index(name='Number of Items')
table = transactions.pivot_table(index='Transaction', columns='itemDescription', values='Number of Items', aggfunc='sum').fillna(0)

In [None]:
table.head()

In [None]:
def hot_encode(x): 
    if(x==0): 
        return False
    if(x>0): 
        return True

In [None]:
final_table = table.applymap(hot_encode) 

In [None]:
final_table.head()

In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
frequence = apriori(final_table, min_support=0.0005, use_colnames=True)
rules = association_rules(frequence, metric="lift", min_threshold=1)

In [None]:
rules.head()

In [None]:
rules.sort_values('confidence', ascending = False, inplace=True)
rules.head(20)