# Market Basket Analysis using Apriori Algorithm

## Step 1: Load and Preprocess Data

In [1]:

import pandas as pd

# Load dataset
file_path = 'new_retail_data.csv'
df = pd.read_csv(file_path)

# Extract relevant columns
transaction_data = df[['Transaction_ID', 'products']].dropna()

# Convert transactions into a format suitable for Market Basket Analysis
basket = transaction_data.groupby('Transaction_ID')['products'].apply(list).reset_index()

# Display sample transactions
basket.head()


Unnamed: 0,Transaction_ID,products
0,1000007.0,[Cappuccino]
1,1000043.0,[OnePlus]
2,1000073.0,[Cooking]
3,1000088.0,[Flare jeans]
4,1000154.0,[LG G]


## Step 2: Transform Data for Apriori Algorithm

In [2]:

from mlxtend.preprocessing import TransactionEncoder

# Convert transaction list into format suitable for Apriori
te = TransactionEncoder()
basket_encoded = te.fit(basket['products']).transform(basket['products'])

# Convert to DataFrame
basket_df = pd.DataFrame(basket_encoded, columns=te.columns_)
basket_df.head()


Unnamed: 0,4K TV,A-line dress,Acer Iconia Tab,Acer Swift,Action,Adventure,Affogato,Air conditioner,Alkaline water,Amazon Fire Tablet,...,White chocolate,Wide-leg jeans,Windbreaker,Window AC,Wireless headphones,Wrap dress,Wrench,Xiaomi Mi,iPad,iPhone
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## Step 3: Apply Apriori Algorithm

In [3]:

from mlxtend.frequent_patterns import apriori

# Generate frequent itemsets
frequent_itemsets = apriori(basket_df, min_support=0.01, use_colnames=True)
frequent_itemsets.sort_values(by='support', ascending=False).head(10)


Unnamed: 0,support,itemsets


## Step 4: Generate Association Rules

In [4]:

from mlxtend.frequent_patterns import association_rules

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
rules.sort_values(by=['lift', 'confidence'], ascending=[False, False]).head(10)


ValueError: The input DataFrame `df` containing the frequent itemsets is empty.