In [8]:
import pandas as pd
import numpy as np

In [9]:
df = pd.read_csv('supermarket.csv')
print(df.columns)

Index(['Customer Id', 'Date', 'Product'], dtype='object')


In [10]:
unique_products = df['Product'].nunique()

print(f'Total number of unique products: {unique_products}')

Total number of unique products: 137


In [11]:
sales_per_day = df.groupby('Date').size()
average_sales_per_day = sales_per_day.mean()

print(f'Average sales per day: {average_sales_per_day:.2f}')


Average sales per day: 44.93


In [12]:
product_counts = df['Product'].value_counts()

least_common_products = product_counts.nsmallest(4)

print("4 least common products")
print(least_common_products)

4 least common products
Product
kitchen utensil          1
preservation products    1
baby cosmetics           3
bags                     4
Name: count, dtype: int64


In [14]:
df['Date'] = pd.to_datetime(df['Date'])

df_2020 = df[df['Date'].dt.year == 2020]

customer_basket_counts = df_2020['Customer Id'].value_counts()

top_5_customers = customer_basket_counts.nlargest(5)

print("5 customers with most shop in 2020")
print(top_5_customers)

5 customers with most shop in 2020
Customer Id
0ZRF32AJ06BC    26
KACGV5OJXMJW    23
0P1TPIH74E79    21
ED8WY0DE27XG    20
NQQZGF4K1FE0    20
Name: count, dtype: int64


In [15]:
df['Date'] = pd.to_datetime(df['Date'])
df['Day_of_Week'] = df['Date'].dt.day_name()

day_of_week_sales = df['Day_of_Week'].value_counts()

most_sales_day = day_of_week_sales.idxmax()
most_sales_count = day_of_week_sales.max()

print(f"The day of the week with the highest number of products sold is: {most_sales_day} with {most_sales_count} products sold.")

The day of the week with the highest number of products sold is: Wednesday with 4846 products sold.


In [16]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [17]:
transactions = df.groupby('Customer Id')['Product'].apply(list).tolist()

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

print(df.head())

   Instant food products  UHT-milk  abrasive cleaner  artif. sweetener  \
0                  False     False             False             False   
1                  False     False             False             False   
2                  False     False             False             False   
3                  False     False             False             False   
4                  False     False             False             False   

   baby cosmetics   bags  baking powder  bathroom cleaner   beef  berries  \
0           False  False          False             False  False    False   
1           False  False          False             False  False    False   
2           False  False           True             False  False    False   
3           False  False          False             False  False    False   
4           False  False          False             False  False    False   

   ...    tea  toilet cleaner  tropical fruit  turkey  waffles  \
0  ...  False           Fa

In [20]:
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print(rules)


Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]
Index: []


In [22]:
rules['lift'] = pd.to_numeric(rules['lift'])
top_lift_rules = rules.nlargest(5, 'lift')
print("Top 5 rules with highest lift:")
print(top_lift_rules)

Top 5 rules with highest lift:
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]
Index: []


In [23]:
top_products = df.mean().nlargest(5)
print("Top 5 products with highest support:")
print(top_products)

Top 5 products with highest support:
whole milk          0.458772
other vegetables    0.377087
rolls/buns          0.350116
soda                0.313897
yogurt              0.283329
dtype: float64
