[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colab-samples/blob/main/notebooks/basic_notebook_features/text_cells.ipynb)




In [None]:
# Install required libraries
!pip install -U pip setuptools wheel scikit-learn>=1.4 -q
!pip install -U git+https://github.com/pycaret/pycaret.git@master -q
!pip install mlxtend gradio -q
print("‚úÖ All libraries installed!")

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Load Online Retail dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx'
data = pd.read_excel(url)

print(f"‚úÖ Online Retail Dataset loaded: {data.shape}")
print(f"\nColumns: {list(data.columns)}")
print(f"\nSample transactions:")
data.head()

In [None]:
# Clean the dataset
print("üßπ Cleaning data...")

# Remove missing values
data = data.dropna(subset=['InvoiceNo', 'Description', 'CustomerID'])

# Remove cancelled orders (InvoiceNo starting with 'C')
data = data[~data['InvoiceNo'].astype(str).str.startswith('C')]

# Keep only positive quantities and prices
data = data[(data['Quantity'] > 0) & (data['UnitPrice'] > 0)]

# Remove generic descriptions
data = data[~data['Description'].str.contains('POSTAGE|DISCOUNT|SAMPLE|TEST', case=False, na=False)]

print(f"‚úÖ Cleaned data: {data.shape}")
print(f"Unique products: {data['Description'].nunique()}")
print(f"Unique transactions: {data['InvoiceNo'].nunique()}")

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

# Create basket: transactions x products matrix
basket = (data
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum()
          .unstack()
          .reset_index()
          .fillna(0)
          .set_index('InvoiceNo'))

# Convert to binary (1 if purchased, 0 otherwise)
basket_binary = basket.applymap(lambda x: 1 if x > 0 else 0)

print(f"‚úÖ Basket matrix created: {basket_binary.shape}")
print(f"   Transactions: {basket_binary.shape[0]}")
print(f"   Products: {basket_binary.shape[1]}")
print(f"\nSample basket:")
basket_binary.head()

In [None]:
# Find frequent itemsets using Apriori algorithm
print("‚õèÔ∏è Mining frequent itemsets...")
min_support = 0.02  # 2% minimum support

frequent_itemsets = apriori(basket_binary,
                             min_support=min_support,
                             use_colnames=True,
                             max_len=3)

frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

print(f"‚úÖ Found {len(frequent_itemsets)} frequent itemsets")
print(f"\nItemset size distribution:")
print(frequent_itemsets['length'].value_counts().sort_index())
print(f"\nTop 10 frequent itemsets:")
frequent_itemsets.sort_values('support', ascending=False).head(10)

In [None]:
# Generate association rules
print("üìã Generating association rules...")

rules = association_rules(frequent_itemsets,
                          metric="confidence",
                          min_threshold=0.3)

# Add lift filtering
rules = rules[rules['lift'] > 1]

# Sort by lift (most interesting rules first)
rules = rules.sort_values('lift', ascending=False)

print(f"‚úÖ Generated {len(rules)} association rules")
print(f"\nTop 10 rules by lift:")
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create visualizations
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Support vs Confidence colored by Lift
scatter = axes[0].scatter(rules['support'], rules['confidence'],
                          c=rules['lift'], cmap='viridis',
                          alpha=0.6, s=50)
axes[0].set_xlabel('Support', fontsize=12)
axes[0].set_ylabel('Confidence', fontsize=12)
axes[0].set_title('Support vs Confidence (colored by Lift)', fontsize=14)
axes[0].grid(True, alpha=0.3)
plt.colorbar(scatter, ax=axes[0], label='Lift')

# Plot 2: Lift Distribution
axes[1].hist(rules['lift'], bins=30, color='coral', edgecolor='black', alpha=0.7)
axes[1].set_xlabel('Lift', fontsize=12)
axes[1].set_ylabel('Frequency', fontsize=12)
axes[1].set_title('Distribution of Lift Values', fontsize=14)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"üìä Visualization complete!")

In [None]:
# Analyze top rules in detail
print("üîç Top 5 Association Rules:\n")

for idx, rule in rules.head(5).iterrows():
    antecedents = ', '.join(list(rule['antecedents']))
    consequents = ', '.join(list(rule['consequents']))

    print(f"Rule {idx + 1}:")
    print(f"  IF customer buys: {antecedents}")
    print(f"  THEN they also buy: {consequents}")
    print(f"  Support: {rule['support']:.3f} | Confidence: {rule['confidence']:.3f} | Lift: {rule['lift']:.2f}")
    print()

In [None]:
# Save association rules
output_file = 'online_retail_association_rules.csv'
rules.to_csv(output_file, index=False)
print(f"üíæ Rules saved to {output_file}")

print(f"\nüìä Summary Statistics:")
print(f"   ‚Ä¢ Total Rules: {len(rules)}")
print(f"   ‚Ä¢ Average Confidence: {rules['confidence'].mean():.2%}")
print(f"   ‚Ä¢ Average Lift: {rules['lift'].mean():.2f}")
print(f"   ‚Ä¢ Max Lift: {rules['lift'].max():.2f}")

In [None]:
import gradio as gr

# Prepare recommendation function
def recommend_products(product_name):
    """Recommend products based on association rules"""

    # Find rules where the product is in antecedents
    matching_rules = rules[rules['antecedents'].apply(
        lambda x: product_name.upper() in [item.upper() for item in x]
    )]

    if len(matching_rules) == 0:
        return "No recommendations found for this product. Try another product!"

    # Get top 5 recommendations
    top_recommendations = matching_rules.nlargest(5, 'lift')

    result = f"üõí **Recommendations for: {product_name}**\n\n"

    for idx, (_, rule) in enumerate(top_recommendations.iterrows(), 1):
        consequents = ', '.join(list(rule['consequents']))
        result += f"{idx}. **{consequents}**\n"
        result += f"   Confidence: {rule['confidence']:.1%} | Lift: {rule['lift']:.2f}\n\n"

    return result

# Get list of products for dropdown
product_list = sorted(data['Description'].unique()[:100])  # Top 100 products

# Create Gradio interface
demo1 = gr.Interface(
    fn=recommend_products,
    inputs=gr.Dropdown(choices=product_list, label="Select a Product"),
    outputs=gr.Textbox(label="Recommended Products", lines=10),
    title="üõçÔ∏è Product Recommendation System",
    description="Select a product to see what customers typically buy together!",
    examples=[product_list[0], product_list[10], product_list[20]]
)

demo1.launch(share=True)

In [None]:
import gradio as gr

def analyze_basket(products_str):
    """Analyze a shopping basket and suggest additional items"""

    # Parse input products
    products = [p.strip().upper() for p in products_str.split(',')]

    if len(products) == 0:
        return "Please enter at least one product!"

    # Find rules where ANY of the input products are antecedents
    matching_rules = rules[rules['antecedents'].apply(
        lambda x: any(prod in [item.upper() for item in x] for prod in products)
    )]

    if len(matching_rules) == 0:
        return "No suggestions found for these products."

    # Get top suggestions
    top_suggestions = matching_rules.nlargest(10, 'lift')

    # Remove products already in basket
    top_suggestions = top_suggestions[~top_suggestions['consequents'].apply(
        lambda x: any(prod in [item.upper() for item in x] for prod in products)
    )]

    result = f"üõí **Your Basket:** {', '.join(products)}\n\n"
    result += f"üí° **Suggested Add-ons:**\n\n"

    for idx, (_, rule) in enumerate(top_suggestions.head(5).iterrows(), 1):
        consequents = ', '.join(list(rule['consequents']))
        antecedents = ', '.join(list(rule['antecedents']))
        result += f"{idx}. **{consequents}**\n"
        result += f"   Based on: {antecedents}\n"
        result += f"   {rule['confidence']:.1%} of customers who bought these also bought this\n"
        result += f"   Lift: {rule['lift']:.2f}x more likely\n\n"

    return result

# Create Gradio interface
demo2 = gr.Interface(
    fn=analyze_basket,
    inputs=gr.Textbox(
        label="Enter products in your basket (comma-separated)",
        placeholder="e.g., WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER",
        lines=3
    ),
    outputs=gr.Textbox(label="Smart Suggestions", lines=15),
    title="üß∫ Smart Basket Analyzer",
    description="Enter products you're buying and get intelligent cross-sell suggestions based on real purchase patterns!",
    examples=[
        "WHITE HANGING HEART T-LIGHT HOLDER",
        "REGENCY CAKESTAND 3 TIER, PINK REGENCY TEACUP AND SAUCER"
    ]
)

demo2.launch(share=True)