<a href="https://colab.research.google.com/github/shruti-2309/Petclinic/blob/main/mlbc_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# ==============================
# STEP 1: Install required libraries
# ==============================
!pip install pandas numpy scikit-learn gradio



In [8]:
# ==============================
# STEP 2: Upload dataset zip file
# ==============================
from google.colab import files
uploaded = files.upload()

Saving online_shoppers_intention.csv to online_shoppers_intention (1).csv


In [13]:
# ==============================
# STEP 3: Load dataset
# ==============================
import pandas as pd

try:
    df = pd.read_csv("online_shoppers_intention.csv")
    print("✅ Dataset loaded successfully!")
    print("Shape:", df.shape)
    display(df.head())
except FileNotFoundError:
    print("Error: online_shoppers_intention.csv not found. Please upload the file.")

✅ Dataset loaded successfully!
Shape: (12330, 18)


Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,0,0.0,0,0.0,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,0,0.0,0,0.0,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,0,0.0,0,0.0,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,False


In [14]:
# ==============================
# STEP 4: Preprocess dataset & train model
# ==============================
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Encode categorical columns
label_encoders = {}
for col in df.select_dtypes(include="object").columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# Features and target
X = df.drop("Revenue", axis=1)
y = df["Revenue"]

# Scale numeric features
scaler = StandardScaler()
X[X.columns] = scaler.fit_transform(X)

# Train-test split (stratify to keep class distribution)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train Random Forest with class balancing
model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("✅ Model trained successfully!")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

✅ Model trained successfully!
Accuracy: 0.9034874290348743

Classification Report:
               precision    recall  f1-score   support

       False       0.92      0.97      0.94      2084
        True       0.78      0.53      0.63       382

    accuracy                           0.90      2466
   macro avg       0.85      0.75      0.79      2466
weighted avg       0.90      0.90      0.90      2466


Confusion Matrix:
 [[2026   58]
 [ 180  202]]


In [15]:
# ==============================
# STEP 5: Gradio App
# ==============================
import gradio as gr

feature_names = X.columns.tolist()

# Friendly names
friendly_names = {
    "Administrative": "Admin Pages Visited",
    "Administrative_Duration": "Time on Admin Pages (s)",
    "Informational": "Info Pages Visited",
    "Informational_Duration": "Time on Info Pages (s)",
    "ProductRelated": "Product Pages Visited",
    "ProductRelated_Duration": "Time on Product Pages (s)",
    "BounceRates": "Avg. Bounce Rate",
    "ExitRates": "Avg. Exit Rate",
    "PageValues": "Avg. Page Value",
    "SpecialDay": "Closeness to Special Day",
    "Month": "Month",
    "OperatingSystems": "Operating System",
    "Browser": "Browser",
    "Region": "Region",
    "TrafficType": "Traffic Source",
    "VisitorType": "Visitor Type",
    "Weekend": "Weekend"
}

# Tooltips
tooltips = {
    "Administrative": "Number of administrative pages visited (>=0)",
    "Administrative_Duration": "Time spent on admin pages in seconds (>=0)",
    "Informational": "Number of informational pages visited (>=0)",
    "Informational_Duration": "Time spent on informational pages in seconds (>=0)",
    "ProductRelated": "Number of product-related pages visited (>=0)",
    "ProductRelated_Duration": "Time spent on product pages in seconds (>=0)",
    "BounceRates": "Average bounce rate (0 to 1)",
    "ExitRates": "Average exit rate (0 to 1)",
    "PageValues": "Average page value (>=0)",
    "SpecialDay": "Closeness to special day (0 = not special, 1 = closest)",
    "Month": "Month of the session (Jan-Dec)",
    "OperatingSystems": "Operating System used by the visitor",
    "Browser": "Browser used by the visitor",
    "Region": "Visitor region",
    "TrafficType": "Type of traffic source",
    "VisitorType": "Returning, New, or Other visitor",
    "Weekend": "Was the session on a weekend? (Yes/No)"
}

# Default values
default_values = {
    "Administrative": 0,
    "Administrative_Duration": 0,
    "Informational": 0,
    "Informational_Duration": 0,
    "ProductRelated": 0,
    "ProductRelated_Duration": 0,
    "BounceRates": 0.0,
    "ExitRates": 0.0,
    "PageValues": 0.0,
    "SpecialDay": 0.0,
    "Month": "Jan",
    "OperatingSystems": 1,
    "Browser": 1,
    "Region": 1,
    "TrafficType": 1,
    "VisitorType": "Returning_Visitor",
    "Weekend": "No"
}

# Important features to highlight
important_features = [
    "ProductRelated", "ProductRelated_Duration", "BounceRates",
    "ExitRates", "VisitorType", "Weekend", "Month", "Administrative"
]

# Prediction function
def predict_purchase(*inputs):
    input_data = {}
    for col, val in zip(feature_names, inputs):
        if col == "Weekend":
            val = 1 if val == "Yes" else 0
        elif col in label_encoders:
            val = label_encoders[col].transform([str(val)])[0]
        else:
            val = float(val)
        input_data[col] = val

    df_input = pd.DataFrame([input_data], columns=X.columns)
    df_input[X.columns] = scaler.transform(df_input)

    # Probability threshold
    prob = model.predict_proba(df_input)[0][1]
    threshold = 0.15  # adjust to catch more purchases
    prediction = 1 if prob >= threshold else 0

    if prediction == 1:
        return f"✅ Will Purchase (Confidence: {prob*100:.2f}%)", "#d4edda"
    else:
        return f"❌ Will Not Purchase (Confidence: {(1-prob)*100:.2f}%)", "#f8d7da"

# Build Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🛒 Online Shopping Intention Predictor")
    gr.Markdown("⭐ Important features are highlighted. Other features are pre-filled with defaults.")

    input_elements = []
    cols_per_row = 3

    for i in range(0, len(feature_names), cols_per_row):
        row_features = feature_names[i:i+cols_per_row]
        with gr.Row():
            for col in row_features:
                label = friendly_names.get(col, col)
                if col in important_features:
                    label = f"⭐ {label}"
                info_text = tooltips.get(col, "")
                default = default_values.get(col, None)

                # Numeric fields
                if col not in label_encoders and col != "Weekend":
                    if col in ["BounceRates", "ExitRates", "SpecialDay"]:
                        input_elements.append(
                            gr.Number(label=label, info=info_text, value=default, precision=2, minimum=0.0, maximum=1.0)
                        )
                    else:
                        input_elements.append(
                            gr.Number(label=label, info=info_text, value=default, minimum=0.0)
                        )
                # Dropdowns
                elif col in label_encoders:
                    options = label_encoders[col].classes_.tolist()
                    input_elements.append(
                        gr.Dropdown(choices=options, label=label, info=info_text, value=default)
                    )
                elif col == "Weekend":
                    input_elements.append(
                        gr.Dropdown(choices=["No", "Yes"], label=label, info=info_text, value=default)
                    )

    predict_btn = gr.Button("Predict 🛒", elem_id="predict-btn", variant="primary")
    output_box = gr.HTML(label="Prediction Result")

    def predict_from_ui(*args):
        text, color = predict_purchase(*args)
        html_result = f'<div style="background-color:{color};color:black;padding:10px;border-radius:5px;font-weight:bold;">{text}</div>'
        return html_result

    predict_btn.click(fn=predict_from_ui, inputs=input_elements, outputs=output_box)

demo.launch(share=True)



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://068b963e44494b5d64.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


