# Implementing market basket analysis

In [None]:
pip install mlxtend




In [None]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
myretaildata = pd.read_excel('MBA_data.xlsx')
myretaildata.head()

Unnamed: 0,Invoice No,Invoice Created,Shipping Address State,Item Type Name,Item SKU Code,MRP,Customer Id,Separator
0,I0925NC000006465,2025-01-01 10:05:00,Maharashtra,Women Regular Kurta with Churidar & With Dupatta,ADKSET157-M,6499.0,20003549,1
1,I0925NC000006475,2025-01-02 09:50:00,Maharashtra,Women Ethnic Motifs Panelled Sequinned Chander...,ADKSET261-XS,6299.0,20004807,1
2,I0925NC000006477,2025-01-02 09:50:00,Haryana,Floral Embroidered Siquinned V-Neck Flared Geo...,ADDRS180-XL,4499.0,20004884,1
3,I0925NC000006476,2025-01-02 09:49:00,Uttar Pradesh,Embroidered Round Neck Top With Palazzos,ADCRDSET163-M,5499.0,20005795,1
4,I0925NC000006480,2025-01-02 09:50:00,Karnataka,Printed Top With Trousers Co-Ords,ADCRDSET218AA-M,3999.0,20006378,1


# Data Preparation

In [None]:
#Data Cleaning
myretaildata['Item Type Name'] = myretaildata['Item Type Name'].str.strip()
myretaildata.dropna(axis=0, subset=['Invoice No'], inplace=True)
myretaildata['Invoice No'] = myretaildata['Invoice No'].astype('str')
myretaildata = myretaildata[~myretaildata['Invoice No'].str.contains('C')]
myretaildata.head()

Unnamed: 0,Invoice No,Invoice Created,Shipping Address State,Item Type Name,Item SKU Code,MRP,Customer Id,Separator
6,NIMAT25F01096,2025-01-02 09:53:00,Maharashtra,Floral Embroidered Regular Thread Work Kurta w...,ADWHT006-XS,3499.0,20007903,1
13,NIMAT25F01095,2025-01-02 09:53:00,Delhi,Floral Embroidered Bell Sleeves Kurta with Tro...,ADBLK034-M,3299.0,20013731,1
22,NIMAT25F01099,2025-01-03 09:44:00,Gujarat,Women Empire Gotta Patti Chanderi Silk Kurta w...,ADKSET165-XL,7999.0,20031252,1
31,NIMAT25F01098,2025-01-03 09:44:00,Telangana,Embroidered Empire Sequinned Silk Crepe Kurta ...,ADGRY041-S,4499.0,20037806,1
32,NIMAT25F01100,2025-01-03 09:44:00,Maharashtra,Brocade Co-Ord Set,ADCRDSET159-M,4999.0,20037808,1


In [None]:
import pandas as pd

file_name = 'MBA_data.xlsx'

sheet2_data = pd.read_excel(file_name, sheet_name=1)

print(sheet2_data)

                                      States  Orders
0                  Andaman & Nicobar Islands       5
1                             Andhra Pradesh      79
2                          Arunachal Pradesh       8
3                                      Assam      83
4                                      Bihar     273
5                                 Chandigarh      34
6                               Chhattisgarh     102
7   Dadra and Nagar Haveli and Daman and Diu       1
8                                Daman & Diu       2
9                                      Delhi     495
10                                       Goa      28
11                                   Gujarat     310
12                                   Haryana     470
13                          Himachal Pradesh      28
14                           Jammu & Kashmir      42
15                                 Jharkhand     126
16                                 Karnataka     482
17                                    Kerala  

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import requests

data = {
    "States": [
        "Andaman & Nicobar Islands", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chandigarh", "Chhattisgarh",
        "Dadra and Nagar Haveli and Daman and Diu", "Daman & Diu", "Delhi", "Goa", "Gujarat", "Haryana", "Himachal Pradesh",
        "Jammu & Kashmir", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya",
        "Mizoram", "Nagaland", "Odisha", "Puducherry", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana",
        "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"
    ],
    "Orders": [
        5, 79, 8, 83, 273, 34, 102, 1, 2, 495, 28, 310, 470, 28, 42, 126, 482, 53, 340, 830, 3, 2, 1, 3, 112, 4, 139, 281, 7,
        108, 175, 14, 935, 70, 215
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Load GeoJSON for India States
geojson_url = "https://raw.githubusercontent.com/geohacker/india/master/state/india_telengana.geojson"
geojson_data = requests.get(geojson_url).json()

# Generate Choropleth Map
fig = px.choropleth(
    df,
    geojson=geojson_data,
    featureidkey="properties.NAME_1",
    locations="States",
    color="Orders",
    title="Order Count by State in India",
    color_continuous_scale="Turbo"
)

# Extract state center coordinates for annotations
state_centers = {
    "Maharashtra": [19.7515, 75.7139],
    "Uttar Pradesh": [26.8467, 80.9462],
    "Haryana": [29.0588, 76.0856],
    "Delhi": [28.7041, 77.1025],
    "Karnataka": [15.3173, 75.7139],
    "Bihar": [25.0961, 85.3131],
    "Madhya Pradesh": [23.4733, 77.9470],
    "West Bengal": [22.9868, 87.8550],
    "Rajasthan": [27.0238, 74.2179],
    "Gujarat": [22.2587, 71.1924],
    "Telangana": [17.1232, 79.2089],
    "Punjab": [31.1471, 75.3412],
    "Tamil Nadu": [11.1271, 78.6569],
    "Odisha": [20.9517, 85.0985],
    "Jharkhand": [23.6102, 85.2799],
    "Chhattisgarh": [21.2787, 81.8661],
    "Assam": [26.2006, 92.9376],
    "Andhra Pradesh": [15.9129, 79.7400],
    "Uttarakhand": [30.0668, 79.0193],
    "Chandigarh": [30.7333, 76.7794],
    "Kerala": [10.8505, 76.2711],
    "Sikkim": [27.5324, 88.5122],
    "Andaman & Nicobar Islands": [11.7401, 92.6586],
    "Jammu & Kashmir": [33.7782, 76.5762],
    "Himachal Pradesh": [31.1048, 77.1734],
    "Arunachal Pradesh": [28.2180, 94.7278],
    "Goa": [15.2993, 74.1240],
    "Tripura": [23.9408, 91.9882],
    "Manipur": [24.6637, 93.9063],
    "Daman & Diu": [20.4283, 72.8397],
    "Nagaland": [26.1584, 94.5624],
    "Meghalaya": [25.4670, 91.3662],
    "Puducherry": [11.9416, 79.8083]
}

# Add labels with small text size
for state, coords in state_centers.items():
    fig.add_trace(go.Scattergeo(
        lon=[coords[1]],
        lat=[coords[0]],
        text=f"<b>{state}</b><br>{df[df['States'] == state]['Orders'].values[0]}",
        mode="text",
        textfont=dict(
            size=8,
            color="black"
        ),
        showlegend=False
    ))

# Update map layout
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(
    title_font_size=20,
    title_x=0.5,
    margin={"r":0,"t":50,"l":0,"b":0},
    paper_bgcolor="white",
    geo=dict(bgcolor="rgba(0,0,0,0)")
)

# Show the map
fig.show()


Output hidden; open in https://colab.research.google.com to view.

In [None]:
#Separating
mybasket = (myretaildata
          .groupby(['Invoice No', 'Item Type Name'])['Separator']
          .sum().unstack().reset_index().fillna(0)
          .set_index('Invoice No'))

In [None]:
#viewing transaction basket
mybasket.head()

Item Type Name,ADORNIA Self Design Round Neck Sleeveless Embellished Silk Ethnic Jumpsuit,Bandhani Printed Empire Mirror Work Pure Cotton Kurta with Churidar & With Dupatta,Bandhani Printed Panelled Kurti,Brocade Co-Ord Set,Embroidered Empire Sequinned Silk Crepe Kurta with Churidar & With Dupatta,Embroidered Notch Neck Sequinned Straight Kurta With Trousers & Dupatta,Embroidered Regular Chikankari Kurta with Palazzos,Embroidered Regular Sequinned Kurta with Sharara & With Dupatta,Embroidered Regular Sequinned Straight Kurta & Trousers,Embroidered Round Neck Top With Palazzos,...,Women Panelled Gotta Patti Chanderi Silk Kurta with Palazzos & With Dupatta,Women Panelled Gotta Patti Kurta with Palazzos,Women Regular Gotta Patti Kurti with Trousers & With Dupatta,Women Regular Kurta with Churidar & With Dupatta,Women Regular Kurta with Trousers,Woven Design Embroidered Thread Work Anarkali Kurta with Churidar & Dupatta,Woven Design Lapel Collar Jacket and Organza Palazzo,Woven-design 3-Piece Co-Ords,Yoke Design Empire Sequinned Georgette Anarkali Kurta With Churidar & Dupatta,Yoke Design Thread Work V-Neck Anarkali Kurta With Churidar & Dupatta
Invoice No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NIMAT25F00090,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NIMAT25F00091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NIMAT25F00092,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NIMAT25F00093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
NIMAT25F00094,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
def my_encode_units(x):
    return 1 if x > 0 else 0  # Simplified condition

my_basket_sets = mybasket.map(my_encode_units)


# Training Model

In [None]:
# Ensure the basket dataset is of boolean type
my_basket_sets = my_basket_sets.astype(bool)

# Generating frequent itemsets
my_frequent_itemsets = apriori(my_basket_sets, min_support=0.0005, use_colnames=True)


In [None]:
#generating rules
my_rules = association_rules(my_frequent_itemsets, metric="lift", min_threshold=0.1)

In [None]:
#viewing top 10 rules
my_rules.head(100)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Ethnic Motifs Yoke Design Gotta Patti Chander...,(Brocade Co-Ord Set),0.028302,0.089194,0.000858,0.030303,0.339744,1.0,-0.001667,0.939269,-0.666667,0.007353,-0.064658,0.019959
1,(Brocade Co-Ord Set),(Ethnic Motifs Yoke Design Gotta Patti Chander...,0.089194,0.028302,0.000858,0.009615,0.339744,1.0,-0.001667,0.981132,-0.680889,0.007353,-0.019231,0.019959
2,(Brocade Co-Ord Set),(Printed Tunic With Palazzzos),0.089194,0.080617,0.000858,0.009615,0.119272,1.0,-0.006333,0.928309,-0.890199,0.005076,-0.077228,0.010127
3,(Printed Tunic With Palazzzos),(Brocade Co-Ord Set),0.080617,0.089194,0.000858,0.010638,0.119272,1.0,-0.006333,0.9206,-0.889279,0.005076,-0.086248,0.010127
4,(Brocade Co-Ord Set),(Woven-design 3-Piece Co-Ords),0.089194,0.018868,0.000858,0.009615,0.509615,1.0,-0.000825,0.990658,-0.513736,0.008,-0.00943,0.027535
5,(Woven-design 3-Piece Co-Ords),(Brocade Co-Ord Set),0.018868,0.089194,0.000858,0.045455,0.509615,1.0,-0.000825,0.954178,-0.495146,0.008,-0.048023,0.027535
6,(Embroidered Notch Neck Sequinned Straight Kur...,(Floral Embroidered Regular Thread Work Kurta ...,0.006003,0.023156,0.000858,0.142857,6.169312,1.0,0.000719,1.139651,0.842968,0.030303,0.122539,0.089947
7,(Floral Embroidered Regular Thread Work Kurta ...,(Embroidered Notch Neck Sequinned Straight Kur...,0.023156,0.006003,0.000858,0.037037,6.169312,1.0,0.000719,1.032227,0.85777,0.030303,0.031221,0.089947
8,(Women Floral Embroidered Regular Kurta with S...,(Embroidered Notch Neck Sequinned Straight Kur...,0.004288,0.006003,0.000858,0.2,33.314286,1.0,0.000832,1.242496,0.97416,0.090909,0.195168,0.171429
9,(Embroidered Notch Neck Sequinned Straight Kur...,(Women Floral Embroidered Regular Kurta with S...,0.006003,0.004288,0.000858,0.142857,33.314286,1.0,0.000832,1.161664,0.975841,0.090909,0.139166,0.171429


In [None]:
#Filtering rules based on condition
my_rules[ (my_rules['lift'] >= 3) &
       (my_rules['confidence'] >= 0.3) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
25,(Floral Embroidered Round Neck Regular A-Line ...,(Women Regular Kurta with Trousers),0.001715,0.074614,0.000858,0.5,6.701149,1.0,0.00073,1.850772,0.852234,0.011364,0.459685,0.255747
32,(Women Embroidered Regular Sequinned Kurta wit...,(Women Regular Kurta with Churidar & With Dupa...,0.002573,0.023156,0.000858,0.333333,14.395062,1.0,0.000798,1.465266,0.932932,0.034483,0.31753,0.185185
38,(Women Panelled Gotta Patti Kurta with Palazzos),(Woven Design Embroidered Thread Work Anarkali...,0.002573,0.034305,0.000858,0.333333,9.716667,1.0,0.000769,1.448542,0.899398,0.02381,0.309651,0.179167
42,(Women Floral Embroidered Regular Kurta with S...,(Floral Embroidered Regular Thread Work Kurta ...,0.000858,0.023156,0.000858,1.0,43.185185,1.0,0.000838,inf,0.977682,0.037037,1.0,0.518519
43,(Women Floral Embroidered Regular Kurta with S...,(Embroidered Notch Neck Sequinned Straight Kur...,0.000858,0.006003,0.000858,1.0,166.571429,1.0,0.000852,inf,0.99485,0.142857,1.0,0.571429
44,(Embroidered Notch Neck Sequinned Straight Kur...,(Women Floral Embroidered Regular Kurta with S...,0.000858,0.004288,0.000858,1.0,233.2,1.0,0.000854,inf,0.996567,0.2,1.0,0.6
48,(Ethnic Motifs Yoke Design Gotta Patti Chander...,(Women Regular Kurta with Churidar & With Dupa...,0.000858,0.023156,0.000858,1.0,43.185185,1.0,0.000838,inf,0.977682,0.037037,1.0,0.518519
49,(Ethnic Motifs Yoke Design Gotta Patti Chander...,(Woven-design 3-Piece Co-Ords),0.000858,0.018868,0.000858,1.0,53.0,1.0,0.000841,inf,0.981974,0.045455,1.0,0.522727
50,(Women Regular Kurta with Churidar & With Dupa...,(Ethnic Motifs Yoke Design Gotta Patti Chander...,0.000858,0.028302,0.000858,1.0,35.333333,1.0,0.000833,inf,0.972532,0.030303,1.0,0.515152


# Making re-commendations

In [None]:
# Export the top 100 Market Basket Analysis rules to CSV
my_rules.head(100).to_csv("market_basket_analysis.csv", index=False)

print("Market Basket Analysis rules exported successfully!")


Market Basket Analysis rules exported successfully!


In [None]:
my_basket_sets['Woven-design 3-Piece Co-Ords'].sum()

np.int64(22)

In [None]:
my_basket_sets['Brocade Co-Ord Set'].sum()


np.int64(104)