In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [None]:
d= data = pd.read_csv('/content/Market_Basket_Optimisation.csv',header=None)
data.head()

Checking Null values

In [None]:
data.isnull().sum()

Filling the null values

In [None]:
data1 = data.fillna(0)
print(data1.isnull().sum())

Basically it means there are total 19 columns in my dataset. column 0 has no null values, but column 1 has 1754 null values like that.

**Binary Matrix format**

In [None]:
binary_matrix = data.notna().astype(int)
binary_matrix.head()

In [None]:
transactions = []
for _, row in data.iterrows():
    items = [str(item) for item in row if pd.notna(item)]
    transactions.extend(items)
#adding the above data to a new dataframe
new_data = pd.DataFrame(transactions, columns=['Item'])
new_data

Adding a new column quantity the data.

In [None]:
new_data['Quantity']=1
new_data

Product popularity visualization

In [None]:
x = new_data['Item'].value_counts().sort_values(ascending=False)[:10]
x

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x=x.index, y=x.values)
plt.xticks(rotation=90)
plt.title('Product Popularity')
plt.show()

Pie chart

In [None]:
plt.figure(figsize=(6, 6))
plt.pie(x.values, labels=x.index, autopct='%1.1f%%', startangle=140)
plt.title('Itemsets Distribution')
plt.axis('equal')
plt.show()

Apriori Algorithm

In [15]:
encoder = TransactionEncoder()
transaction_encoded = encoder.fit(transactions).transform(transactions)
transaction_df = pd.DataFrame(transaction_encoded, columns=encoder.columns_)
frequent_itemsets = apriori(transaction_df, min_support=0.04, use_colnames=True)
association_rules_result = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

print("\nAssociation Rules:")
print(association_rules_result)


Association Rules:
       antecedents                  consequents  antecedent support  \
0              ( )                          (a)            0.469094   
1              (a)                          ( )            0.574260   
2              ( )                          (b)            0.469094   
3              (b)                          ( )            0.172292   
4              ( )                          (d)            0.469094   
...            ...                          ...                 ...   
119665         (e)  ( , t, i, n, w, m, r, a, l)            0.795082   
119666         (m)  ( , t, i, n, w, e, r, a, l)            0.261145   
119667         (r)  ( , t, i, n, w, e, m, a, l)            0.481116   
119668         (a)  ( , t, i, n, w, e, m, r, l)            0.574260   
119669         (l)  ( , t, i, n, w, e, m, r, a)            0.348227   

        consequent support   support  confidence      lift  leverage  \
0                 0.574260  0.304635    0.649412  1.130

In [None]:
pivot_table = association_rules_result.pivot(index='antecedents', columns='consequents', values=['support', 'confidence'])

pivot_table = pivot_table.fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(pivot_table, annot=True, fmt='.2f', cmap='YlGnBu', cbar=True)
plt.title('Association Rules Strength: Support and Confidence')
plt.xlabel('Consequents')
plt.ylabel('Antecedents')
plt.show()

  and should_run_async(code)


Store Layout optimization

In [None]:
import networkx as nx
G = nx.Graph()
for _, row in high_confidence_rules.iterrows():
    G.add_edge(row['antecedents'], row['consequents'], weight=row['lift'])

pos = nx.spring_layout(G)
labels = {node: node for node in G.nodes()}
nx.draw(G, pos, labels=labels, with_labels=True, node_size=1000, font_size=10, font_color='black')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red')
plt.title('Store Layout Optimization')
plt.show()

Cross-Selling matrix

In [None]:
cross_selling_matrix = pd.pivot_table(association_rules_result, index='antecedents', columns='consequents', values='confidence')

plt.figure(figsize=(10, 8))
sns.heatmap(cross_selling_matrix, cmap='YlGnBu', annot=True, fmt='.2f', cbar=True)
plt.title('Cross-Selling Strategy Matrix')
plt.xlabel('Consequents')
plt.ylabel('Antecedents')
plt.show()

Personalized recommedations

In [None]:
def get_personalized_recommendations(basket):
    recommendations = set()
    for itemset in frequent_itemsets['itemsets']:
        if itemset.issubset(basket):
            recommendations.update(itemset)
    recommendations -= basket
    return recommendations

customer_basket = {'shrimp', 'green grapes', 'avocado'}
recommended_products = get_personalized_recommendations(customer_basket)
print("Recommended Products:", recommended_products)

Cross-selling strategy

In [None]:
average_lift_before = association_rules_df['lift'].mean()
simulated_support_increase = 1.1                          # 10% increase
frequent_itemsets_simulated = apriori(binary_matrix, min_support=0.05 * simulated_support_increase, use_colnames=True)
association_rules_simulated = association_rules(frequent_itemsets_simulated, metric="lift", min_threshold=1)
average_lift_after = association_rules_simulated['lift'].mean()

lift_improvement = (average_lift_after - average_lift_before) / average_lift_before * 100

print("Average Lift before cross-selling strategy:", average_lift_before)
print("Average Lift after simulated cross-selling strategy:", average_lift_after)
print("Lift Improvement:", lift_improvement, "%")