In [1]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn import svm

In [2]:
train_bow_df = pd.read_csv("../data/train_bow.csv")
train_tfidf_df = pd.read_csv("../data/train_tfidf.csv")
test_bow_df = pd.read_csv("../data/test_bow.csv")
test_tfidf_df = pd.read_csv("../data/test_tfidf.csv")

In [3]:
X_train_bow = train_bow_df.drop("isFakeNews", axis = 1)
y_train_bow = train_bow_df["isFakeNews"]
X_test_bow = test_bow_df.drop("isFakeNews", axis = 1)
y_test_bow = test_bow_df["isFakeNews"]

In [4]:
X_train_tfidf = train_tfidf_df.drop("isFakeNews", axis = 1)
y_train_tfidf = train_tfidf_df["isFakeNews"]
X_test_tfidf = test_tfidf_df.drop("isFakeNews", axis = 1)
y_test_tfidf = test_tfidf_df["isFakeNews"]

# Linear SVC 

In [5]:
def svc_param_selection_linear(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    param_grid = {'C': Cs}
    grid_search = GridSearchCV(svm.SVC(kernel='linear'), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_

##### Tried running Grid search CV but it took took long so I just use C=1 instead

In [None]:
best_param_bow_linear = svc_param_selection_linear(X_train_bow, y_train_bow, 10)
best_param_tfidf_linear = svc_param_selection_linear(X_train_tfidf, y_train_tfidf, 10)

In [6]:
svc_linear_bow = svm.SVC(kernel='linear', C=1).fit(X_train_bow, y_train_bow)
y_pred_bow= svc_linear_bow.predict(X_test_bow)
print('Testing accuracy %s' % accuracy_score(y_test_bow, y_pred_bow))


Testing accuracy 0.9273840769903762


In [10]:
svc_linear_tfidf = svm.SVC(kernel='linear', C=1).fit(X_train_tfidf, y_train_tfidf)
y_pred_tfidf = svc_linear_tfidf.predict(X_test_tfidf)
print('Testing accuracy %s' % accuracy_score(y_test_tfidf, y_pred_tfidf))

Testing accuracy 0.9052930883639545


# Radial SVC

In [11]:
def svc_param_selection_radial(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_

In [None]:
best_param_bow_radial = svc_param_selection_radial(X_train_bow, y_train_bow, 10)
best_param_tfidf_radial = svc_param_selection_radial(X_train_tfidf, y_train_tfidf, 10)

In [8]:
svc_radial_bow = svm.SVC(kernel='rbf', C=1,gamma=0.1).fit(X_train_bow, y_train_bow)
y_pred_bow = svc_radial_bow.predict(X_test_bow)
print('Testing accuracy %s' % accuracy_score(y_test_bow, y_pred_bow))

Testing accuracy 0.9278215223097113


In [9]:
svc_radial_tfidf = svm.SVC(kernel='rbf', C=1,gamma=0.1).fit(X_train_tfidf, y_train_tfidf)
y_pred_tfidf = svc_radial_tfidf.predict(X_test_tfidf)
print('Testing accuracy %s' % accuracy_score(y_test_tfidf, y_pred_tfidf))

Testing accuracy 0.9052930883639545
