### Data Classification

In [1]:
import pandas as pd
import numpy as np
import pickle
from classifiers.features import Features
from classifiers.svm import SVM
from classifiers.knn import KNN
from classifiers.rfc import RFC

dataframe = pd.read_csv('./data/dataframe.csv')
features = Features(dataframe)

svm_classifier = SVM()
# svm_params = dict(gamma=np.logspace(-9, 3, 3), C=np.logspace(-2, 10, 3))
svm_params = dict(gamma=[1.0], C=[10.0])
svm_classifier.train_models(svm_params, features)

knn_classifier = KNN()
knn_params = dict(n_neighbors=[3, 5, 11, 19],
                  weights=['uniform', 'distance'],
                  metric=['euclidean', 'manhattan'])
knn_classifier.train_models(knn_params, features)

rfc_classifier = RFC()
rfc_params = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}
rfc_params = { 
    'n_estimators': [200],
    'max_features': ['auto'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['entropy']
}
rfc_classifier.train_models(rfc_params, features)

Reading SVM with TF. Model
Model loaded in: @ 0 seconds
-------------------------------------------------------------------------------------------
Reading SVM with TF of 100 sexist words. Model
Model loaded in: @ 0 seconds
-------------------------------------------------------------------------------------------
Reading SVM with TF of 100 not sexist words. Model
Model loaded in: @ 0 seconds
-------------------------------------------------------------------------------------------
Reading SVM with Char quantity. Model
Model loaded in: @ 0 seconds
-------------------------------------------------------------------------------------------
Reading SVM with Word quantity. Model
Model loaded in: @ 0 seconds
-------------------------------------------------------------------------------------------
Reading SVM with Likes quantity. Model
Model loaded in: @ 0 seconds
-------------------------------------------------------------------------------------------
Reading SVM with Dislikes quantity

### Classification Report

In [2]:
svm_classifier.report_results(features)

>>>> SVM with TF results
		 sexist 	 not-sexit
precision	 0.99766 	 0.91856
recall		 0.91929 	 0.99765
f1		 0.95683 	 0.95643

>>>> SVM with TF to 100 sexist words results
		 sexist 	 not-sexit
precision	 0.97531 	 0.86193
recall		 0.85750 	 0.97608
f1		 0.91256 	 0.91542

>>>> SVM with TF to 100 not sexist words results
		 sexist 	 not-sexit
precision	 0.83160 	 0.94887
recall		 0.96143 	 0.78588
f1		 0.89175 	 0.85958

>>>> SVM with Likes quantity results
		 sexist 	 not-sexit
precision	 0.58900 	 0.56886
recall		 0.65750 	 0.49608
f1		 0.62127 	 0.52982

>>>> SVM with Disikes quantity results
		 sexist 	 not-sexit
precision	 0.60660 	 0.56897
recall		 0.60929 	 0.56588
f1		 0.60769 	 0.56714

>>>> SVM with Likes and Disikes quantity results
		 sexist 	 not-sexit
precision	 0.74964 	 0.67527
recall		 0.66893 	 0.75490
f1		 0.70689 	 0.71279

>>>> SVM with Char quantity results
		 sexist 	 not-sexit
precision	 0.67145 	 0.66840
recall		 0.72393 	 0.61059
f1		 0.69653 	 0.63792

>>>> S

In [3]:
knn_classifier.report_results(features)

>>>> KNN with TF results
		 sexist 	 not-sexit
precision	 0.92413 	 0.97736
recall		 0.98071 	 0.91137
f1		 0.95153 	 0.94312

>>>> KNN with TF to 100 sexist words results
		 sexist 	 not-sexit
precision	 0.98123 	 0.86175
recall		 0.85643 	 0.98196
f1		 0.91454 	 0.91790

>>>> KNN with TF to 100 not sexist words results
		 sexist 	 not-sexit
precision	 0.81516 	 0.96888
recall		 0.97786 	 0.75608
f1		 0.88904 	 0.84913

>>>> KNN with Likes quantity results
		 sexist 	 not-sexit
precision	 0.56486 	 0.52289
recall		 0.57000 	 0.51765
f1		 0.56737 	 0.52020

>>>> KNN with Disikes quantity results
		 sexist 	 not-sexit
precision	 0.61660 	 0.53767
recall		 0.46500 	 0.68275
f1		 0.53002 	 0.60151

>>>> KNN with Likes and Disikes quantity results
		 sexist 	 not-sexit
precision	 0.62399 	 0.58289
recall		 0.61214 	 0.59490
f1		 0.61791 	 0.58873

>>>> KNN with Char quantity results
		 sexist 	 not-sexit
precision	 0.61074 	 0.58905
recall		 0.65571 	 0.54118
f1		 0.63231 	 0.56392

>>>> K

In [4]:
rfc_classifier.report_results(features)

>>>> RFC with TF results
		 sexist 	 not-sexit
precision	 0.98126 	 0.86450
recall		 0.85964 	 0.98196
f1		 0.91637 	 0.91945

>>>> RFC with TF to 100 sexist words results
		 sexist 	 not-sexit
precision	 0.97370 	 0.82049
recall		 0.80536 	 0.97608
f1		 0.88150 	 0.89151

>>>> RFC with TF to 100 not sexist words results
		 sexist 	 not-sexit
precision	 0.94166 	 0.77868
recall		 0.75429 	 0.94863
f1		 0.83754 	 0.85525

>>>> RFC with Likes quantity results
		 sexist 	 not-sexit
precision	 0.56790 	 0.56256
recall		 0.71357 	 0.40392
f1		 0.63239 	 0.47003

>>>> RFC with Disikes quantity results
		 sexist 	 not-sexit
precision	 0.58662 	 0.55767
recall		 0.62929 	 0.51294
f1		 0.60705 	 0.53415

>>>> RFC with Likes and Disikes quantity results
		 sexist 	 not-sexit
precision	 0.57617 	 0.58174
recall		 0.73214 	 0.40863
f1		 0.64480 	 0.47987

>>>> RFC with Char quantity results
		 sexist 	 not-sexit
precision	 0.55058 	 0.57762
recall		 0.82857 	 0.25725
f1		 0.66151 	 0.35561

>>>> R