In [None]:
# Install dependencies if not installed
!pip install pandas numpy mlxtend networkx matplotlib seaborn

# Import required libraries
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

from mlxtend.frequent_patterns import apriori, association_rules

# Load dataset (modify path if needed)
df = pd.read_csv("Online_Retail.csv", encoding="ISO-8859-1")

# Drop missing values
df.dropna(subset=['CustomerID'], inplace=True)

# Convert InvoiceDate to datetime
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Remove canceled transactions (Invoices starting with 'C')
df = df[~df['InvoiceNo'].astype(str).str.startswith('C')]

# Filter for positive quantity
df = df[df['Quantity'] > 0]

# Create a basket matrix (Product vs. Transaction)
basket = df.groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().fillna(0)

# Convert to binary (0/1) for Apriori
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

# Perform Apriori algorithm
frequent_itemsets = apriori(basket, min_support=0.02, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

# Sort rules by lift
rules = rules.sort_values(by="lift", ascending=False)

# Display top 10 association rules
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))

# Visualize network graph of relationships
G = nx.Graph()

for _, row in rules.iterrows():
    for antecedent in row['antecedents']:
        for consequent in row['consequents']:
            G.add_edge(antecedent, consequent, weight=row['lift'])

plt.figure(figsize=(10, 6))
pos = nx.spring_layout(G, k=0.5)
nx.draw(G, pos, with_labels=True, node_size=3000, edge_color="gray", font_size=10)
plt.title("Product Association Network")
plt.show()

# Recommendation Engine: Given a product, suggest related items
def recommend_products(product, rules_df, num_recommendations=3):
    """Suggest products based on association rules."""
    recommendations = rules_df[rules_df['antecedents'].apply(lambda x: product in x)]
    recommendations = recommendations[['consequents', 'lift']].sort_values(by='lift', ascending=False)

    if not recommendations.empty:
        print(f"Products frequently bought with '{product}':")
        for index, row in recommendations.head(num_recommendations).iterrows():
            print(f"- {list(row['consequents'])[0]} (Lift: {row['lift']:.2f})")
    else:
        print(f"No strong associations found for '{product}'.")

# Example: Get product recommendations
recommend_products("WHITE HANGING HEART T-LIGHT HOLDER", rules)
