In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm  # Import the tqdm library
import joblib

data = pd.read_csv('datasetofsenti.csv')
data = data.drop(columns=["Unnamed: 0"], errors="ignore")

train_texts, test_texts, train_labels, test_labels = train_test_split(
    data["text"], data["label"], test_size=0.375, random_state=42
)

vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1, 2), stop_words="english")
X_train = vectorizer.fit_transform(train_texts)
X_test = vectorizer.transform(test_texts)


svm_model = SVC(kernel="linear", C=1.0, random_state=42)

for i in tqdm(range(1), desc="Training the SVM model"):
    svm_model.fit(X_train, train_labels)

predictions = svm_model.predict(X_test)

accuracy = accuracy_score(test_labels, predictions)
print(f"Accuracy on the Test Set: {accuracy * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(test_labels, predictions))

joblib.dump(svm_model, 'svm_sentiment_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')


Training the SVM model: 100%|██████████| 1/1 [45:50<00:00, 2750.08s/it]


Accuracy on the Test Set: 90.17%

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.94     45340
           1       0.92      0.93      0.92     52802
           2       0.79      0.77      0.78     12821
           3       0.92      0.91      0.91     21605
           4       0.85      0.87      0.86     18051
           5       0.75      0.70      0.72      5685

    accuracy                           0.90    156304
   macro avg       0.86      0.85      0.86    156304
weighted avg       0.90      0.90      0.90    156304



['tfidf_vectorizer.pkl']