In [1]:
import pandas as pd
import random


In [2]:
# Define some example categories and sample titles
categories = {
    100: ["iPhone 13", "iPhone 14", "iPhone case", "Apple phone"],
    200: ["USB-C Charger", "Wireless Charger", "Laptop charger"],
    300: ["MacBook Pro", "Dell XPS", "HP Laptop"],
    400: ["Apple Watch", "Smartwatch", "Fitness tracker"],
    500: ["Phone Case", "Samsung case", "Android cover"]
}

# Generate synthetic data
data = {"title": [], "category_id": []}

for cat_id, titles in categories.items():
    for _ in range(100):  # 100 examples per category
        phrase = random.choice(titles) + " " + random.choice(["", "Pro", "Plus", "2023", "Gen 3"])
        data["title"].append(phrase)
        data["category_id"].append(cat_id)

df = pd.DataFrame(data)

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['title'], df['category_id'], test_size=0.2, random_state=42)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Vectorize product titles
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_vec, y_train)

# Predict
y_pred = rf.predict(X_test_vec)


In [9]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

         100       1.00      1.00      1.00        28
         200       1.00      1.00      1.00        14
         300       1.00      1.00      1.00        10
         400       1.00      1.00      1.00        24
         500       1.00      1.00      1.00        24

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100



In [10]:
def predict_category(title):
    vec = vectorizer.transform([title])
    pred = rf.predict(vec)[0]
    return pred

print(predict_category("iPhone 14 Pro"))         # → 100
print(predict_category("Fast USB-C Charger"))    # → 200
print(predict_category("Dell Laptop"))           # → 300


100
200
300
