In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns



import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



# Cleaning

In [None]:
data = pd.read_csv("/kaggle/input/the-bread-basket/bread basket.csv")
data.head()

In [None]:
data.info()

There is no missing data

In [None]:
data['date_time'] = pd.to_datetime(data['date_time'])

In [None]:
# Extracting date
data['date'] = data['date_time'].dt.date

# Extracting time
data['time'] = data['date_time'].dt.time

# Extracting months and replacing numbers with months
data['months'] = data['date_time'].dt.month
data['months'] = data['months'].replace((1,2,3,4,5,6,7,8,9,10,11,12), ('january', 'febuary', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december'))

# Extracting hours
data['hours'] = data['date_time'].dt.hour
hrs_in_nums = (1,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23)
hrs_in_obj = ('1-2', '7-8', '8-9', '9-10', '10-11', '11-12', '12-13', '13-14', '14-15', '15-16', '16-17', '17-18', '18-19', '19-20', '20-21', '21-22', '22-23', '23-24')
data['hours'] = data['hours'].replace(hrs_in_nums, hrs_in_obj)

#Extracting weekday and replacing it with text
data['weekdays'] = data['date_time'].dt.weekday
data['weekdays'] = data['weekdays'].replace((0,1,2,3,4,5,6), ('monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'))

data.drop('date_time', axis=1, inplace=True)

In [None]:
data['Item'] = data['Item'].str.strip()
data['Item'] = data['Item'].str.lower()
data.head()

# Data Viz.

In [None]:
# Top items purchased by customers
plt.figure(figsize = (15,5))
sns.barplot(x = data.Item.value_counts().head(10).index, y = data.Item.value_counts().head(10).values, palette = 'gnuplot')
plt.xlabel('Items', size = 15)
plt.xticks()
plt.ylabel('Count', size = 15)
plt.title("Top 10 Items purchased by customers", color = 'green', size = 20)
plt.show()

Coffee has the highest transaction

brownie is the 10th most buyed product

In [None]:
# No. of orders received each month
plt.figure(figsize = (15,5))
sns.barplot(x = data.months.value_counts().index , y = data.months.value_counts().values, palette = 'mako')
plt.xlabel('Months', size = 15)
plt.xticks(rotation = 90)
plt.ylabel('Counts', size = 15)
plt.title("No. of Orders received each month")
plt.show()


In March , November, January, Febuary, December the most no. orders were received.

In [None]:
# No. of orders received each day
weekday_trans = data.groupby('weekdays')['Transaction'].count().reset_index()
weekday_trans.loc[:, 'weekorder'] = [4,0,5,6,3,1,2]
weekday_trans.sort_values('weekorder', inplace = True)


plt.figure(figsize = (15,5))
sns.barplot(data = weekday_trans, x = 'weekdays' , y = 'Transaction', palette = 'rocket')
plt.xlabel('Weekdays', size = 15)
plt.xticks(rotation = 90)
plt.ylabel('Counts', size = 15)
plt.title("No. of Orders received each day")
plt.show()


In [None]:
# No. of order received each hour
hourTran = data.groupby('hours')['Transaction'].count().reset_index()
hourTran.loc[:, 'hourorder'] = [1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,7,8,9]
hourTran.sort_values('hourorder', inplace = True)

plt.figure(figsize = (12,5))
sns.barplot(data = hourTran, x = 'Transaction', y = 'hours')
plt.ylabel('Hours', size = 15)
plt.xlabel('Orders each hour', size = 15)
plt.title('Count of orders received each hour', color = 'green', size = 20)
plt.show()

Most of the order are in between 12 - 5 in afternoon

In [None]:
dayTran = data.groupby('period_day')['Transaction'].count().reset_index()
plt.figure(figsize = (15,5))
sns.barplot(data = dayTran, x ='Transaction', y='period_day')
plt.ylabel('Period', size = 15)
plt.xlabel('Orders each period of a day', size = 15)
plt.title('Count of orders received each period of a day', color = 'green', size = 20)
plt.show()

People prefer to Order int the morning and afternoon

In [None]:
df = data.groupby(['period_day', 'Item'])['Transaction'].count().reset_index().sort_values(['period_day','Transaction'],ascending = False)
day = ['morning','afternoon','evening','night']

plt.figure(figsize = (15,8))
for i,j in enumerate(day):
    plt.subplot(2,2,i+1)
    df1 = df[df.period_day == j].head(10)
    sns.barplot(data = df1 , y = df1.Item, x = df1.Transaction, color = 'lightblue')
    plt.xlabel('')
    plt.ylabel('')
    plt.title('Top 10 items people like to order in "{}"'.format(j), size = 15)
plt.show()

In [None]:
from mlxtend.frequent_patterns import association_rules, apriori

In [None]:
trans_item_cnt = data.groupby(['Transaction', 'Item'])['Item'].count().reset_index(name = 'Count')
trans_item_cnt

In [None]:
my_basket = trans_item_cnt.pivot_table(index = 'Transaction', columns = 'Item', values='Count', aggfunc = 'sum').fillna(0)
my_basket

In [None]:
def encode(x):
    if x<=0:
        return 0
    if x>=1:
        return 1

my_basket_sets = my_basket.applymap(encode)
my_basket_sets

In [None]:
freq_items = apriori(my_basket_sets, min_support = 0.01, use_colnames = True)
freq_items

In [None]:
rules = association_rules(freq_items, metric = 'lift', min_threshold = 1)
rules.sort_values('confidence', ascending = False, inplace = True)
rules

In [None]:
rules.sort_values('confidence', ascending = False)