**Support Vector Machine**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

In [4]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Make sure to download the necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:
# Preprocessing function
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    tokens = word_tokenize(text)  # Tokenize
    tokens = [word for word in tokens if word not in stopwords.words('english')]  # Remove stopwords
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatization
    return ' '.join(tokens)  # Return the processed text as a single string

# Load the dataset
data = pd.read_csv('/content/products_details_labelled_complete.csv')

# Combine and preprocess the text data
data['combined_text'] = data['product_title'].fillna('') + " " + data['description'].fillna('')
data['processed_text'] = data['combined_text'].apply(preprocess_text)

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Label'])

# Create TF-IDF features
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # You can tune the number of features
X_tfidf = tfidf_vectorizer.fit_transform(data['processed_text']).toarray()

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.3, random_state=42)

# Create and train the SVM model
svm_model = SVC(kernel='linear')  # You can experiment with different kernels
svm_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = svm_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.6567460317460317

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.72      0.84        18
           1       0.85      0.57      0.68        51
           2       0.65      0.93      0.76       229
           3       0.70      0.38      0.49        37
           4       0.36      0.12      0.18        41
           5       0.56      0.28      0.37        69
           6       0.62      0.68      0.65        50
           7       1.00      0.44      0.62         9

    accuracy                           0.66       504
   macro avg       0.72      0.52      0.57       504
weighted avg       0.65      0.66      0.62       504



**Polynomial kernel**

In [10]:
# Create and train the SVM model with a polynomial kernel
svm_poly_model = SVC(kernel='poly')  # Degree can be tuned
svm_poly_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred_poly = svm_poly_model.predict(X_test)
print("Polynomial Kernel Accuracy:", accuracy_score(y_test, y_pred_poly))
print("\nPolynomial Kernel Classification Report:\n", classification_report(y_test, y_pred_poly))

Polynomial Kernel Accuracy: 0.48214285714285715

Polynomial Kernel Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.06      0.11        18
           1       1.00      0.04      0.08        51
           2       0.47      0.99      0.63       229
           3       0.33      0.03      0.05        37
           4       1.00      0.02      0.05        41
           5       0.75      0.04      0.08        69
           6       1.00      0.16      0.28        50
           7       1.00      0.11      0.20         9

    accuracy                           0.48       504
   macro avg       0.82      0.18      0.18       504
weighted avg       0.67      0.48      0.35       504



In [17]:
from sklearn.model_selection import GridSearchCV
parameter_grid = {
    'C': [0.1, 1, 10, 100],
    'degree': [2, 3, 4, 5],
    'coef0': [0.0, 1.0, 2.0]
}
grid_search = GridSearchCV(SVC(kernel='poly'), parameter_grid, cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Create and train the SVM model with the best parameters
tuned_svm_model = SVC(kernel='poly', C=best_params['C'], degree=best_params['degree'], coef0=best_params['coef0'])
tuned_svm_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = tuned_svm_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Best Parameters: {'C': 0.1, 'coef0': 2.0, 'degree': 3}
Accuracy: 0.6547619047619048

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.61      0.76        18
           1       0.86      0.59      0.70        51
           2       0.67      0.91      0.78       229
           3       0.61      0.38      0.47        37
           4       0.44      0.17      0.25        41
           5       0.47      0.29      0.36        69
           6       0.58      0.70      0.64        50
           7       0.67      0.44      0.53         9

    accuracy                           0.65       504
   macro avg       0.66      0.51      0.56       504
weighted avg       0.64      0.65      0.63       504



**RBF Kernel**

In [11]:
# Create and train the SVM model with RBF kernel
svm_rbf_model = SVC(kernel='rbf')  # Gamma can be tuned
svm_rbf_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred_rbf = svm_rbf_model.predict(X_test)
print("RBF Kernel Accuracy:", accuracy_score(y_test, y_pred_rbf))
print("\nRBF Kernel Classification Report:\n", classification_report(y_test, y_pred_rbf))

RBF Kernel Accuracy: 0.5892857142857143

RBF Kernel Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.33      0.50        18
           1       0.87      0.51      0.64        51
           2       0.54      0.96      0.69       229
           3       0.50      0.08      0.14        37
           4       1.00      0.05      0.09        41
           5       0.72      0.19      0.30        69
           6       0.76      0.52      0.62        50
           7       1.00      0.22      0.36         9

    accuracy                           0.59       504
   macro avg       0.80      0.36      0.42       504
weighted avg       0.68      0.59      0.52       504



In [12]:
from sklearn.model_selection import GridSearchCV

parameters = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001]}
grid_search = GridSearchCV(SVC(kernel='rbf'), parameters, cv=5)
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)

{'C': 100, 'gamma': 0.01}


In [15]:
# Retrieve the best parameters from the grid search
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

Best Parameters: {'C': 100, 'gamma': 0.01}


In [16]:
#Create and train the SVM model with the best parameters
tuned_svm_model = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'])
tuned_svm_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = tuned_svm_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.6666666666666666

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.67      0.80        18
           1       0.81      0.69      0.74        51
           2       0.72      0.90      0.80       229
           3       0.50      0.38      0.43        37
           4       0.48      0.24      0.32        41
           5       0.43      0.29      0.35        69
           6       0.56      0.70      0.62        50
           7       0.71      0.56      0.63         9

    accuracy                           0.67       504
   macro avg       0.65      0.55      0.59       504
weighted avg       0.65      0.67      0.65       504



**Sigmoid kernel**

In [8]:
# Create and train the SVM model with a sigmoid kernel
svm_sigmoid_model = SVC(kernel='sigmoid')  # Hyperparameters can be tuned
svm_sigmoid_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred_sigmoid = svm_sigmoid_model.predict(X_test)
print("Sigmoid Kernel Accuracy:", accuracy_score(y_test, y_pred_sigmoid))
print("\nSigmoid Kernel Classification Report:\n", classification_report(y_test, y_pred_sigmoid))

Sigmoid Kernel Accuracy: 0.6567460317460317

Sigmoid Kernel Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.67      0.80        18
           1       0.86      0.59      0.70        51
           2       0.63      0.94      0.75       229
           3       0.78      0.38      0.51        37
           4       0.45      0.12      0.19        41
           5       0.61      0.25      0.35        69
           6       0.62      0.68      0.65        50
           7       1.00      0.44      0.62         9

    accuracy                           0.66       504
   macro avg       0.74      0.51      0.57       504
weighted avg       0.67      0.66      0.62       504

