# Package Installations and Imports

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score

import joblib
import json

In [None]:
f = open('SelectedHeaders.json')
selectedheaders = json.load(f)

In [None]:
print(len(selectedheaders["SVM1"]))
print(len(selectedheaders["SVM2"]))
print(len(selectedheaders["SVM3"]))

40
50
44


# SVM1 Training (Q1&Q2/Q3&Q4)

Train Model with selected features and apply hyperparameter tuning

In [None]:
featuredatatrain=pd.read_csv('Features_half_train.csv')
x_train=np.asarray(featuredatatrain[selectedheaders["SVM1"]])
y_train=np.asarray(featuredatatrain['sentiment'])
featuredatatest=pd.read_csv('Features_half_test.csv')
x_test=np.asarray(featuredatatest[selectedheaders["SVM1"]])
y_test=np.asarray(featuredatatest['sentiment'])

scaler = MinMaxScaler()
scaler.fit(x_train)
x_test=scaler.transform(x_test)
x_train=scaler.transform(x_train)


parameters = {'kernel':['linear', 'rbf','poly'], 'C':[0.1,0.3,0.7,1,2,5,10,50,100,1000],'gamma':['auto','scale',1,0.1]}
svc = svm.SVC()

classifier = GridSearchCV(svc, parameters)
classifier.fit(x_train, y_train)
predict=classifier.predict(x_test)
print(classification_report(y_test,predict))
print(accuracy_score(y_test,predict))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85       114
           1       0.87      0.82      0.85       114

    accuracy                           0.85       228
   macro avg       0.85      0.85      0.85       228
weighted avg       0.85      0.85      0.85       228

0.8508771929824561


Best Model Parameters for these features

In [None]:
featuredatatrain=pd.read_csv('Features_half_train.csv')
x_train=np.asarray(featuredatatrain[selectedheaders["SVM1"]])
y_train=np.asarray(featuredatatrain['sentiment'])
featuredatatest=pd.read_csv('Features_half_test.csv')
x_test=np.asarray(featuredatatest[selectedheaders["SVM1"]])
y_test=np.asarray(featuredatatest['sentiment'])

bestparams=classifier.best_params_
bestclassifier = svm.SVC(kernel=bestparams['kernel'], gamma=bestparams['gamma'], C=bestparams['C'])
pipeline = make_pipeline(MinMaxScaler(),bestclassifier )

pipeline.fit(x_train, y_train)
predict=pipeline.predict(x_test)
print(classification_report(y_test,predict))
print(accuracy_score(y_test,predict))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85       114
           1       0.87      0.82      0.85       114

    accuracy                           0.85       228
   macro avg       0.85      0.85      0.85       228
weighted avg       0.85      0.85      0.85       228

0.8508771929824561


Save Model

In [None]:
joblib.dump(pipeline, "Half_Classifier_Model.pkl")

['Half_Classifier_Model.pkl']

# SVM2 Training (Q1/Q2)

Train Model with selected features and apply hyperparameter tuning

In [None]:
featuredatatrain=pd.read_csv('Features_tophalf_train.csv')
x_train=np.asarray(featuredatatrain[selectedheaders["SVM2"]])
y_train=np.asarray(featuredatatrain['sentiment'])
featuredatatest=pd.read_csv('Features_tophalf_test.csv')
x_test=np.asarray(featuredatatest[selectedheaders["SVM2"]])
y_test=np.asarray(featuredatatest['sentiment'])

scaler = MinMaxScaler()
scaler.fit(x_train)
x_test=scaler.transform(x_test)
x_train=scaler.transform(x_train)

parameters = {'kernel':['linear', 'rbf','poly'], 'C':[0.1,0.3,0.7,1,2,5,10,50,100,1000],'gamma':['auto','scale',1,0.1]}
svc = svm.SVC()

classifier = GridSearchCV(svc, parameters)
classifier.fit(x_train, y_train)
predict=classifier.predict(x_test)
print(classification_report(y_test,predict))
print(accuracy_score(y_test,predict))

              precision    recall  f1-score   support

           0       0.84      0.91      0.87        57
           1       0.90      0.82      0.86        57

    accuracy                           0.87       114
   macro avg       0.87      0.87      0.87       114
weighted avg       0.87      0.87      0.87       114

0.868421052631579


Best Model Parameters for these features

In [None]:
featuredatatrain=pd.read_csv('Features_tophalf_train.csv')
x_train=np.asarray(featuredatatrain[selectedheaders["SVM2"]])
y_train=np.asarray(featuredatatrain['sentiment'])
featuredatatest=pd.read_csv('Features_tophalf_test.csv')
x_test=np.asarray(featuredatatest[selectedheaders["SVM2"]])
y_test=np.asarray(featuredatatest['sentiment'])

bestparams=classifier.best_params_
bestclassifier = svm.SVC(kernel=bestparams['kernel'], gamma=bestparams['gamma'], C=bestparams['C'])
pipeline = make_pipeline(MinMaxScaler(),bestclassifier )

pipeline.fit(x_train, y_train)
predict=pipeline.predict(x_test)
print(classification_report(y_test,predict))
print(accuracy_score(y_test,predict))

              precision    recall  f1-score   support

           0       0.84      0.91      0.87        57
           1       0.90      0.82      0.86        57

    accuracy                           0.87       114
   macro avg       0.87      0.87      0.87       114
weighted avg       0.87      0.87      0.87       114

0.868421052631579


Save Model

In [None]:
joblib.dump(pipeline, "TopHalf_Classifier_Model.pkl")

['TopHalf_Classifier_Model.pkl']

# SVM3 Training (Q3/Q4)

Train Model with selected features and apply hyperparameter tuning

In [None]:
featuredatatrain=pd.read_csv('Features_bottomhalf_train.csv')
x_train=np.asarray(featuredatatrain[selectedheaders["SVM3"]])
y_train=np.asarray(featuredatatrain['sentiment'])
featuredatatest=pd.read_csv('Features_bottomhalf_test.csv')
x_test=np.asarray(featuredatatest[selectedheaders["SVM3"]])
y_test=np.asarray(featuredatatest['sentiment'])

scaler = MinMaxScaler()
scaler.fit(x_train)
x_test=scaler.transform(x_test)
x_train=scaler.transform(x_train)

parameters = {'kernel':['linear', 'rbf','poly'], 'C':[0.1,0.3,0.7,1,2,5,10,50,100,1000],'gamma':['auto','scale',1,0.1]}
svc = svm.SVC()

classifier = GridSearchCV(svc, parameters)
classifier.fit(x_train, y_train)
predict=classifier.predict(x_test)
print(classification_report(y_test,predict))
print(accuracy_score(y_test,predict))

              precision    recall  f1-score   support

           0       0.64      0.65      0.64        57
           1       0.64      0.63      0.64        57

    accuracy                           0.64       114
   macro avg       0.64      0.64      0.64       114
weighted avg       0.64      0.64      0.64       114

0.6403508771929824


Best Model Parameters for these features

In [None]:
featuredatatrain=pd.read_csv('Features_bottomhalf_train.csv')
x_train=np.asarray(featuredatatrain[selectedheaders["SVM3"]])
y_train=np.asarray(featuredatatrain['sentiment'])
featuredatatest=pd.read_csv('Features_bottomhalf_test.csv')
x_test=np.asarray(featuredatatest[selectedheaders["SVM3"]])
y_test=np.asarray(featuredatatest['sentiment'])

bestparams=classifier.best_params_
bestclassifier = svm.SVC(kernel=bestparams['kernel'], gamma=bestparams['gamma'], C=bestparams['C'])
pipeline = make_pipeline(MinMaxScaler(),bestclassifier )

pipeline.fit(x_train, y_train)
predict=pipeline.predict(x_test)
print(classification_report(y_test,predict))
print(accuracy_score(y_test,predict))

              precision    recall  f1-score   support

           0       0.64      0.65      0.64        57
           1       0.64      0.63      0.64        57

    accuracy                           0.64       114
   macro avg       0.64      0.64      0.64       114
weighted avg       0.64      0.64      0.64       114

0.6403508771929824


Save Model

In [None]:
joblib.dump(pipeline, "BottomHalf_Classifier_Model.pkl")

['BottomHalf_Classifier_Model.pkl']