In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Ecommerce_data.csv")
print(df.shape)
df.head(5)


(24000, 2)


Unnamed: 0,Text,label
0,Urban Ladder Eisner Low Back Study-Office Comp...,Household
1,"Contrast living Wooden Decorative Box,Painted ...",Household
2,IO Crest SY-PCI40010 PCI RAID Host Controller ...,Electronics
3,ISAKAA Baby Socks from Just Born to 8 Years- P...,Clothing & Accessories
4,Indira Designer Women's Art Mysore Silk Saree ...,Clothing & Accessories


In [29]:
df.label.value_counts()

label
Household                 6000
Electronics               6000
Clothing & Accessories    6000
Books                     6000
Name: count, dtype: int64

Label Encoding

In [4]:
X = df.Text
y = df.label

In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

y = le.fit_transform(y)

In [6]:
print(y[:4])

[3 3 2 1]


# Dividing train test data

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42,stratify = y)

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report , accuracy_score

clf = Pipeline([
    ('tf_idf', TfidfVectorizer()),
    ('Multinomial', MultinomialNB())
])

clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

0.9623611111111111
              precision    recall  f1-score   support

           0       0.93      0.98      0.95      1711
           1       0.98      0.98      0.98      1810
           2       0.97      0.97      0.97      1797
           3       0.97      0.93      0.95      1882

    accuracy                           0.96      7200
   macro avg       0.96      0.96      0.96      7200
weighted avg       0.96      0.96      0.96      7200



In [13]:
idx = clf.predict(['Mens socks comfortable and soft'])
ans = le.inverse_transform(idx)
print(ans)

['Clothing & Accessories']


# Saving a model

In [14]:
import pickle 
  
# Save the trained model as a pickle string. 
saved_model = pickle.dumps(clf) 
  
# Load the pickled model 
model = pickle.loads(saved_model) 
  
# Use the loaded pickled model to make predictions 


array([2, 3, 2, ..., 2, 1, 2])

In [27]:
def predict_category(items):
    
    items_str = ", ".join(items.split(', '))
    idx = model.predict([items_str])
    return le.inverse_transform(idx)

In [28]:
import gradio as gr


# Create the Gradio interface
interface = gr.Interface(
    fn=predict_category,
    inputs=gr.Textbox(lines=2, placeholder="Enter items separated by commas..."),
    outputs=gr.Textbox(),
    title="Shopping Category Predictor",
    description="Predict the shopping category (e.g., electronics) based on the input items.",
    examples=[["bulb, fan"], ["laptop, mouse"], ["shirt, jeans"]]
)

# Launch the Gradio app
interface.launch()

Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.


