In [2]:
import pandas as pd
import numpy as np

In [24]:
dataset = pd.read_csv("preprocessed_dataset.csv")

In [26]:
dataset

Unnamed: 0,category,rating,label,stemmed_text
0,home_and_kitchen_5,5.0,cg,love thi well made sturdi and veri comfort i l...
1,home_and_kitchen_5,5.0,cg,love it a great upgrad from the origin ive had...
2,home_and_kitchen_5,5.0,cg,thi pillow save my back i love the look and fe...
3,home_and_kitchen_5,1.0,cg,miss inform on how to use it but it is a great...
4,home_and_kitchen_5,5.0,cg,veri nice set good qualiti we have had the set...
...,...,...,...,...
40415,clothing_shoes_and_jewelry_5,4.0,or,i had read some review say that thi bra ran sm...
40416,clothing_shoes_and_jewelry_5,5.0,cg,i wasnt sure exactli what it would be it is a ...
40417,clothing_shoes_and_jewelry_5,2.0,or,you can wear the hood by itself wear it with t...
40418,clothing_shoes_and_jewelry_5,1.0,cg,i like noth about thi dress the onli reason i ...


In [28]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd


dataset = dataset.dropna(subset=['stemmed_text'])

X = dataset['stemmed_text']
y = dataset['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42)
}

pipelines = {
    name: Pipeline([
        ('vectorizer', TfidfVectorizer()),
        ('classifier', model)
    ]) for name, model in models.items()
}

for name, pipeline in pipelines.items():
    print(f"Training and evaluating {name}...")
    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    
    print(f"Classification Report for {name}:\n")
    print(classification_report(y_test, y_pred))


Training and evaluating Random Forest...
Classification Report for Random Forest:

              precision    recall  f1-score   support

          cg       0.85      0.92      0.88      4050
          or       0.91      0.84      0.87      4034

    accuracy                           0.88      8084
   macro avg       0.88      0.88      0.88      8084
weighted avg       0.88      0.88      0.88      8084

Training and evaluating SVM...
Classification Report for SVM:

              precision    recall  f1-score   support

          cg       0.92      0.91      0.92      4050
          or       0.91      0.92      0.92      4034

    accuracy                           0.92      8084
   macro avg       0.92      0.92      0.92      8084
weighted avg       0.92      0.92      0.92      8084

Training and evaluating Logistic Regression...
Classification Report for Logistic Regression:

              precision    recall  f1-score   support

          cg       0.90      0.89      0.90      4

In [29]:
import joblib

for name, pipeline in pipelines.items():
    filename = f"{name.lower().replace(' ', '_')}_model.pkl"
    
    joblib.dump(pipeline, filename)
    print(f"Saved {name} model to {filename}.")

Saved Random Forest model to random_forest_model.pkl.
Saved SVM model to svm_model.pkl.
Saved Logistic Regression model to logistic_regression_model.pkl.


In [36]:
# Sample data for prediction
sample_data = [
    "This is a sample text for prediction.",
    "Another example of a sample text for the classifier."
]

# Loop over each model to make predictions
for name in models.keys():
    filename = f"{name.lower().replace(' ', '_')}_model.pkl"
    
    # Load the saved model
    loaded_model = joblib.load(filename)
    
    # Predict on the sample data
    predictions = loaded_model.predict(sample_data)
    
    # Output the predictions
    print(f"\nPredictions from {name} model:")
    for text, pred in zip(sample_data, predictions):
        print(f"Text: {text}\nPredicted Label: {pred}\n")



Predictions from Random Forest model:
Text: This is a sample text for prediction.
Predicted Label: cg

Text: Another example of a sample text for the classifier.
Predicted Label: cg


Predictions from SVM model:
Text: This is a sample text for prediction.
Predicted Label: or

Text: Another example of a sample text for the classifier.
Predicted Label: or


Predictions from Logistic Regression model:
Text: This is a sample text for prediction.
Predicted Label: or

Text: Another example of a sample text for the classifier.
Predicted Label: or

