# Support Vector Machines Classifier (SVM)

In [1]:
import pandas as pd

### Import data

In [2]:
data = pd.read_csv('../data_cleaning/preprocessed_train_2000.csv')
# data = data[['text_lemmatized','sentiment']]
data = data.dropna()
data

Unnamed: 0.1,Unnamed: 0,text,polarity,subjectivity
0,0,im moving another country starting new life,0,-1
1,1,want trump amp family packing single hold fede...,0,-1
2,2,think moving another country twenties one chal...,0,-1
3,3,actually moving another country one bars windows,0,-1
4,4,trump floats idea moving another country democ...,0,-1
...,...,...,...,...
2033,2033,ungrateful pompous ass quit job move another c...,0,-1
2034,2034,people like idiot hate america much makes wond...,-1,1
2035,2035,never moving california unless democracy kille...,-1,1
2036,2036,really want study abroad next year seems scary...,0,-1


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.feature_extraction.text import TfidfVectorizer

import numpy as np
import nltk

## Subjectivity Detection

### Convert text to TF-IDF

In [4]:
X_train = data["text"]
y_train = data["subjectivity"]

In [5]:
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

In [6]:
# X_train, X_test, y_train, y_test = train_test_split(data["text"],data["subjectivity"])

#### Support Vector Machines (SVM) Classifier

In [7]:
subj_svm = LinearSVC(random_state=0)

In [8]:
subj_svm.fit(X_train_tfidf,y_train)

#### Results on 1000 Evaluation Dataset

In [9]:
test_cases = pd.read_csv('../data_cleaning/preprocessed_eval_1000.csv')
test_cases

Unnamed: 0.1,Unnamed: 0,text,polarity,subjectivity
0,0,move completely different location costs hundr...,0,-1
1,1,started sharing information move another count...,1,1
2,2,parents made tough decision move family anothe...,-1,1
3,3,aye tryna head wild googles get visa move anot...,0,-1
4,4,val going move another country cooking opinion...,-1,1
...,...,...,...,...
1265,1265,separation anxiety gavin bradley tracks experi...,1,1
1266,1266,crippling asian elder daughter feelings chokes...,0,-1
1267,1267,phone dad last night asking job going amp dati...,-1,1
1268,1268,may qualify take advantage two possible tax ex...,0,-1


In [10]:
test_tfidf = tfidf_vectorizer.transform(test_cases['text'])
prediction_prob = subj_svm._predict_proba_lr(test_tfidf)[:,1]
prediction = subj_svm.predict(test_tfidf)

In [11]:
test_case_ground_truth = test_cases['subjectivity']

In [12]:
test_case_ground_truth = np.array(test_case_ground_truth)
test_case_ground_truth

array([-1,  1,  1, ...,  1, -1,  1], dtype=int64)

In [13]:
prediction

array([-1,  1,  1, ...,  1, -1, -1], dtype=int64)

In [14]:
print("Subjectivity Detection Model Evaluation")
print("1: OPINIONATED")
print("-1: NEUTRAL")
print(classification_report(test_case_ground_truth,prediction))
print('Confusion Matrix:\n',confusion_matrix(test_case_ground_truth, prediction))

Subjectivity Detection Model Evaluation
1: OPINIONATED
-1: NEUTRAL
              precision    recall  f1-score   support

          -1       0.68      0.68      0.68       608
           1       0.71      0.71      0.71       662

    accuracy                           0.70      1270
   macro avg       0.69      0.69      0.69      1270
weighted avg       0.70      0.70      0.70      1270

Confusion Matrix:
 [[411 197]
 [190 472]]
