# Support Vector Machines

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Preparing the Data

In [13]:
dataset = pd.read_csv('twitter_training.csv')
dataset = dataset.iloc[:10000, :]
dataset.head()

Unnamed: 0,2401,Borderlands,Positive,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...


In [14]:
dataset.columns = ['ID', 'Category', 'Sentiment', 'Text']
dataset = dataset.dropna(subset="Text")

In [15]:
X = dataset.loc[:, "Text"].values
y = dataset.loc[:, "Sentiment"].values

## Splitting the Data

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Label Encoding

In [6]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

## Pipeline for Vectorization and Model

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

steps = [
    ('vectorizer', TfidfVectorizer()),
    ('svm', SVC(kernel="linear"))
]

pipeline = Pipeline(steps)
pipeline

## Training the Model

In [8]:
pipeline.fit(X_train, y_train)

## Making Predictions

In [9]:
print("Hello whats up buddie?: "  + le.inverse_transform(pipeline.predict(["Hello whats up buddie?"]))[0])
print("I am so happy today!: "  + le.inverse_transform(pipeline.predict(["I am so happy today!"]))[0])
print("I am pissed off!: "  + le.inverse_transform(pipeline.predict(["I am pissed off!"]))[0])

Hello whats up buddie?: Irrelevant
I am so happy today!: Positive
I am pissed off!: Negative


In [10]:
y_pred = pipeline.predict(X_test)

## Evaluation

In [11]:
from sklearn.metrics import classification_report, accuracy_score

accuracy = accuracy_score(y_test, y_pred)
classification_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Report: {classification_report}")

Accuracy: 0.9266936299292214
Report:               precision    recall  f1-score   support

           0       0.93      0.91      0.92       340
           1       0.94      0.92      0.93       472
           2       0.96      0.91      0.94       504
           3       0.89      0.95      0.92       662

    accuracy                           0.93      1978
   macro avg       0.93      0.92      0.93      1978
weighted avg       0.93      0.93      0.93      1978

