<a href="https://colab.research.google.com/github/pratham-rajesh/recommender-system-hackathon-256/blob/main/Market_Basket_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# üõí Electronic Item Checkout Web Portal

Market Basket Recommendation System using Apriori Algorithm with Gradio

In [1]:
!pip install -q pandas gradio mlxtend

In [2]:
from google.colab import files
import os

uploaded = files.upload()
csv_file = [f for f in uploaded.keys() if f.endswith('.csv')][0] if uploaded else None

if csv_file:
    print(f"‚úÖ Uploaded: {csv_file}")
    CSV_FILE_PATH = csv_file
else:
    CSV_FILE_PATH = "CMPE256_Hackathon_market_basket_analysis_Release.csv"
    print(f"‚ö†Ô∏è Using: {CSV_FILE_PATH}")

Saving CMPE256_Hackathon_market_basket_analysis_Release.csv to CMPE256_Hackathon_market_basket_analysis_Release.csv
‚úÖ Uploaded: CMPE256_Hackathon_market_basket_analysis_Release.csv


In [3]:
import pandas as pd
import gradio as gr
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
warnings.filterwarnings('ignore')

# ========== CORE FUNCTIONS ==========

def load_and_preprocess_data(file_path):
    """Load the CSV file and preprocess data for Apriori algorithm."""
    df = pd.read_csv(file_path)
    transactions = []
    all_items_set = set()

    for idx, row in df.iterrows():
        transaction = []
        for col in ['item_1', 'item_2', 'item_3', 'item_4', 'item_5']:
            item = row[col]
            if pd.notna(item) and str(item).strip():
                transaction.append(str(item).strip())
                all_items_set.add(str(item).strip())

        if transaction:
            transactions.append(transaction)

    all_items = sorted(list(all_items_set))
    return transactions, all_items

def generate_association_rules(transactions, min_support=0.005, min_confidence=0.1):
    """Generate association rules using Apriori algorithm."""
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)

    if len(frequent_itemsets) > 0:
        rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
        return rules
    else:
        return pd.DataFrame()

def recommend_items(cart, rules_df, transactions=None, top_n=3, return_details=False):
    """Given a list of items in the cart, return top recommended items based on association rules."""
    if len(cart) == 0:
        return [] if not return_details else ([], {})
    if len(rules_df) == 0:
        return [] if not return_details else ([], {})

    recommendations = {}
    recommendation_details = {}

    cart_set = set(item.strip() for item in cart)
    cart_normalized = {item.lower().strip(): item for item in cart}
    cart_set_lower = set(cart_normalized.keys())

    # Filter rules: only keep rules where ALL antecedent items are in cart (strict subset check)
    valid_rules = []
    for idx, rule in rules_df.iterrows():
        antecedents = rule['antecedents']

        if isinstance(antecedents, frozenset):
            antecedent_items = {str(item).strip() for item in antecedents}
            if antecedent_items.issubset(cart_set):
                valid_rules.append((idx, rule, len(antecedent_items)))

    if not valid_rules:
        return [] if not return_details else ([], {
            'rules_used': {},
            'fallback_used': False,
            'fallback_type': None,
            'transaction_counts': {},
            'cart_size': len(cart)
        })

    # Dynamic filtering for 3+ item carts
    cart_size = len(cart)
    if cart_size >= 3:
        valid_rules = [(idx, rule, size) for idx, rule, size in valid_rules if size >= 2]
        if not valid_rules:
            return [] if not return_details else ([], {
                'rules_used': {},
                'fallback_used': False,
                'fallback_type': None,
                'transaction_counts': {},
                'cart_size': cart_size
            })

    valid_rules.sort(key=lambda x: x[2], reverse=True)

    for idx, rule, ant_size in valid_rules:
        antecedents = rule['antecedents']
        consequents = rule['consequents']
        confidence = rule['confidence']
        lift = rule['lift']

        if isinstance(consequents, frozenset):
            consequent_items = [str(item).strip() for item in consequents]

            for consequent_str in consequent_items:
                consequent_normalized = consequent_str.lower().strip()

                in_cart = False
                if consequent_normalized in cart_set_lower:
                    in_cart = True
                else:
                    for cart_norm in cart_set_lower:
                        if (consequent_normalized == cart_norm or
                            consequent_normalized in cart_norm or
                            cart_norm in consequent_normalized):
                            in_cart = True
                            break

                if not in_cart:
                    score = confidence * lift
                    if consequent_str not in recommendations:
                        recommendations[consequent_str] = score
                        recommendation_details[consequent_str] = []
                    else:
                        if score > recommendations[consequent_str]:
                            recommendations[consequent_str] = score

                    if return_details:
                        antecedent_list = [str(a) for a in antecedents]
                        support = rule.get('antecedent support', rule.get('support', 'N/A'))
                        recommendation_details[consequent_str].append({
                            'antecedents': antecedent_list,
                            'confidence': confidence,
                            'lift': lift,
                            'support': support,
                            'conviction': rule.get('conviction', 'N/A')
                        })

    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    result = [item for item, score in sorted_recommendations[:top_n]]

    if return_details:
        return result, {
            'rules_used': recommendation_details,
            'fallback_used': False,
            'fallback_type': None,
            'transaction_counts': {},
            'cart_size': len(cart)
        }

    return result

# ========== GRADIO UI FUNCTIONS ==========

# Global state
cart_state = []
all_items_list = []
rules_df_global = None
transactions_global = None

def format_cart_display(cart):
    """Format cart items for display."""
    if not cart:
        return "üõí Your cart is empty. Add items to see recommendations!"

    items_html = "\n".join([f"üì¶ {i+1}. {item}" for i, item in enumerate(cart)])
    return items_html

def format_recommendations(recommended_items, details, cart):
    """Format recommendations with validation details."""
    if not recommended_items:
        cart_items_text = " and ".join([f"**{item}**" for item in cart])
        return f"""üì≠ **No further recommendations available** ‚Äî no historical transaction includes all items in the current cart.

**Your cart contains:** {cart_items_text}

**Why no recommendations:**
- The system filters association rules to only consider those where **all antecedent items** are fully contained within your cart
- With {len(cart)} items in your cart, no association rules have antecedents that are a complete subset of your cart items
- This means no historical transaction in the dataset contains all the items you currently have in your cart together

üí° **Try:** Removing an item from your cart to see recommendations based on the remaining items."""

    result = "üí° **Recommended Items:**\n\n"

    for i, item in enumerate(recommended_items, 1):
        result += f"‚úÖ **{i}.** {item}\n\n"

    # Add validation details
    if details and 'rules_used' in details:
        result += "\n---\n"
        result += "**üìä Recommendation Validation:**\n\n"
        result += "‚úÖ **Recommendation Method:** Association Rules (Apriori Algorithm)\n"
        result += "**Matching Strategy:** Only rules where **ALL items in the antecedent** are present in your cart.\n\n"

        for item in recommended_items:
            if item in details['rules_used'] and len(details['rules_used'][item]) > 0:
                rule_info = details['rules_used'][item][0]  # Get first rule
                conf = rule_info.get('confidence', 0)
                lift = rule_info.get('lift', 0)
                support = rule_info.get('support', 'N/A')

                result += f"**üéØ {item}**\n"
                result += f"  - **Confidence:** {conf:.1%} | **Lift:** {lift:.2f} | **Support:** {support}\n"
                if lift != 'N/A' and isinstance(lift, (int, float)) and lift > 1:
                    result += f"  - ‚úì Lift > 1: This item is {lift:.2f}x more likely to appear with your cart items!\n"
                result += "\n"

    return result

def add_item_to_cart(item, current_cart_text):
    """Add an item to the cart."""
    global cart_state

    if item and item not in cart_state:
        cart_state.append(item)
        # Get recommendations
        recs, details = recommend_items(cart_state, rules_df_global, transactions_global, top_n=3, return_details=True)

        cart_display = format_cart_display(cart_state)
        recommendations_display = format_recommendations(recs, details, cart_state)

        return cart_display, recommendations_display, ""  # Clear the dropdown selection
    elif item in cart_state:
        return current_cart_text, "", f"‚ö†Ô∏è {item} is already in your cart!"

    return current_cart_text, "", ""

def clear_cart():
    """Clear the cart."""
    global cart_state
    cart_state = []
    return format_cart_display([]), "Add items to your cart to see personalized recommendations!", ""

def create_gradio_app(csv_file_path):
    """Create and launch the Gradio app."""
    global all_items_list, rules_df_global, transactions_global

    print("üìä Loading data and generating association rules...")
    transactions_global, all_items_list = load_and_preprocess_data(csv_file_path)
    rules_df_global = generate_association_rules(transactions_global, min_support=0.005, min_confidence=0.1)

    print(f"‚úÖ Loaded {len(transactions_global)} transactions with {len(all_items_list)} unique items")
    print(f"‚úÖ Generated {len(rules_df_global)} association rules")

    # Create Gradio interface
    with gr.Blocks(title="Electronic Item Checkout Web Portal", theme=gr.themes.Soft()) as app:
        gr.Markdown("""
        <div style="background-color: #003767; color: white; padding: 20px; border-radius: 10px; text-align: center; margin-bottom: 20px;">
            <h1>üõí Electronic Item Checkout Web Portal</h1>
            <p style="margin: 0; font-size: 14px;">SAN JOS√â STATE UNIVERSITY - Market Basket Recommendation System</p>
        </div>
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### üì¶ Added Items to Cart:")
                cart_display = gr.Markdown(value=format_cart_display([]))

                gr.Markdown("### ‚ûï Select Item from the List:")
                item_dropdown = gr.Dropdown(
                    choices=all_items_list,
                    label="Choose an item to add:",
                    interactive=True
                )
                add_button = gr.Button("Add Item", variant="primary", size="lg")
                status_message = gr.Markdown("")
                clear_button = gr.Button("üóëÔ∏è Clear Cart", variant="secondary")

            with gr.Column(scale=2):
                gr.Markdown("### üí° Recommended Items:")
                recommendations_display = gr.Markdown(value="Add items to your cart to see personalized recommendations!")

                with gr.Accordion("üìä Dataset Statistics", open=False):
                    stats = gr.Markdown(f"""
                    **Total Transactions:** {len(transactions_global)}
                    **Total Unique Items:** {len(all_items_list)}
                    **Association Rules Generated:** {len(rules_df_global)}
                    """)

        # Event handlers
        add_button.click(
            fn=add_item_to_cart,
            inputs=[item_dropdown, cart_display],
            outputs=[cart_display, recommendations_display, status_message]
        )

        clear_button.click(
            fn=clear_cart,
            inputs=[],
            outputs=[cart_display, recommendations_display, status_message]
        )

    return app

# Load data and create app
print("üöÄ Creating Gradio app...")
app = create_gradio_app(CSV_FILE_PATH)
print("\n‚úÖ App created! Launching...")
app.launch(share=False, inbrowser=False, server_name="0.0.0.0", server_port=7860)


üöÄ Creating Gradio app...
üìä Loading data and generating association rules...
‚úÖ Loaded 1000 transactions with 40 unique items
‚úÖ Generated 296 association rules

‚úÖ App created! Launching...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

