# Market Basket Analysis using Apriori

In [570]:
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns

## Load the data

In [571]:
data = pd.read_csv("bread basket.csv")
data.head()

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend


In [572]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Transaction      20507 non-null  int64 
 1   Item             20507 non-null  object
 2   date_time        20507 non-null  object
 3   period_day       20507 non-null  object
 4   weekday_weekend  20507 non-null  object
dtypes: int64(1), object(4)
memory usage: 801.2+ KB


## Preprocessing

### Changing date to correct datatype

In [573]:
data["date_time"] = pd.to_datetime(data["date_time"])





In [574]:
data["Transaction"].nunique()

9465

### Creating different columns based on Date

In [575]:
data["date"] = data["date_time"].dt.date
data["time"] = data["date_time"].dt.time
data["month"] = data["date_time"].dt.month_name()
data["hour"] = data["date_time"].dt.hour

In [576]:
data["weekday"] = data["date_time"].dt.weekday.map(
    {
        0: "Monday",
        1: "Tuesday",
        2: "Wednesday",
        3: "Thursday",
        4: "Friday",
        5: "Saturday",
        6: "Sunday",
    }
)

In [577]:
data.drop("date_time", axis=1, inplace=True)

In [578]:
data.head()

Unnamed: 0,Transaction,Item,period_day,weekday_weekend,date,time,month,hour,weekday
0,1,Bread,morning,weekend,2016-10-30,09:58:00,October,9,Sunday
1,2,Scandinavian,morning,weekend,2016-10-30,10:05:00,October,10,Sunday
2,2,Scandinavian,morning,weekend,2016-10-30,10:05:00,October,10,Sunday
3,3,Hot chocolate,morning,weekend,2016-10-30,10:07:00,October,10,Sunday
4,3,Jam,morning,weekend,2016-10-30,10:07:00,October,10,Sunday


### Removing spaces and lowering cases

In [579]:
data["Item"] = data["Item"].str.strip().str.lower()

In [580]:
data.head()

Unnamed: 0,Transaction,Item,period_day,weekday_weekend,date,time,month,hour,weekday
0,1,bread,morning,weekend,2016-10-30,09:58:00,October,9,Sunday
1,2,scandinavian,morning,weekend,2016-10-30,10:05:00,October,10,Sunday
2,2,scandinavian,morning,weekend,2016-10-30,10:05:00,October,10,Sunday
3,3,hot chocolate,morning,weekend,2016-10-30,10:07:00,October,10,Sunday
4,3,jam,morning,weekend,2016-10-30,10:07:00,October,10,Sunday


### Exploratory Data Analysis

### Top 20 Products

In [581]:
top20 = data["Item"].value_counts().head(20)

In [582]:
fig = px.bar(data_frame=top20, text_auto=True, title="Top 20 Products")
fig.show()

Coffee is the most selling product <br>
Of course it is the best !!

### No of transaction per month

In [583]:
trans_per_month = data.groupby("month").agg({"Transaction": "count"})
trans_per_month

Unnamed: 0_level_0,Transaction
month,Unnamed: 1_level_1
April,1157
December,3339
February,3906
January,3356
March,3944
November,4436
October,369


In [584]:
fig = px.bar(data_frame=trans_per_month, text_auto=True, title="Transactions per Month")
fig.show()

This dataset only contains from April to October

### Orders received each day

In [585]:
each_day = data.groupby("weekday").agg({"Transaction": "count"})
each_day

Unnamed: 0_level_0,Transaction
weekday,Unnamed: 1_level_1
Friday,3124
Monday,2324
Saturday,4605
Sunday,3095
Thursday,2646
Tuesday,2392
Wednesday,2321


In [586]:
fig = px.bar(data_frame=each_day, title="Transactions per day", text_auto=True)
fig.update_layout(
    xaxis={
        "categoryorder": "array",
        "categoryarray": [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ],
    }
)
fig.show()

Most sales were on saturday

### Orders received each hour

In [587]:
each_hour = data.groupby("hour").agg({"Transaction": "count"})

In [588]:
each_hour

Unnamed: 0_level_0,Transaction
hour,Unnamed: 1_level_1
1,1
7,24
8,645
9,1966
10,2666
11,3102
12,2854
13,2617
14,2640
15,2115


In [589]:
fig = px.bar(
    data_frame=each_hour,
    title="Transactions per hour",
    text_auto=True,
    height=800,
)
fig.show()

### Transactions during period of day

In [590]:
period_day = data.groupby("period_day").agg({"Transaction": "count"})
period_day

Unnamed: 0_level_0,Transaction
period_day,Unnamed: 1_level_1
afternoon,11569
evening,520
morning,8404
night,14


In [591]:
fig = px.bar(
    data_frame=period_day,
    title="Transactions per period of day",
    text_auto=True,
    orientation="h",
)
fig.show()

People prefer to order in morning and afternoon