### This module takes the prediction vectors of classical models built using all Features and feeds to the ANN

In [1]:
#make necessary imports
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [2]:
#Read the data
df=pd.read_csv('../../datasets/liar_tweaked/trainfeaturedata.csv')
testdf=pd.read_csv('../../datasets/liar_tweaked/testfeaturedata.csv')
validdf=pd.read_csv('../../datasets/liar_tweaked/validfeaturedata.csv')

In [3]:
#Feature Scaling
sc=StandardScaler()
df.loc[:,'SentimentScore':'CountWord']=sc.fit_transform(df.loc[:,'SentimentScore':'CountWord'])
testdf.loc[:,'SentimentScore':'CountWord']=sc.fit_transform(testdf.loc[:,'SentimentScore':'CountWord'])
validdf.loc[:,'SentimentScore':'CountWord']=sc.fit_transform(validdf.loc[:,'SentimentScore':'CountWord'])

In [4]:
#split for training and leave 40% for ANN training
classical_x_train,Left_for_ANN_x_train,classical_y_train,Left_for_ANN_y_train=train_test_split(df.loc[:,'SentimentScore':'CountWord'], df['label'], test_size=0.4, random_state=7)

In [5]:
#prepare ANN train data, use 60% data to train classical ML models and get predictions on 
#remaining 40% data, use this remaining 40% data for ANN training
def prepareANNtrainData(x_train,y_train,x_test):
    clf = SVC()
    clf.fit(x_train, y_train)
    y_SVM = clf.predict(x_test)
    clf = RandomForestClassifier()
    clf.fit(x_train, y_train)
    y_RF = clf.predict(x_test)
    clf = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    clf.fit(x_train, y_train)
    y_KNN = clf.predict(x_test)
    clf = LogisticRegression(random_state = 0)
    clf.fit(x_train, y_train)
    y_LR = clf.predict(x_test)
    df=pd.DataFrame()
    df['SVM'],df['RF'],df['KNN'],df['LR']=y_SVM, y_RF, y_KNN, y_LR
    return df

In [6]:
#prepare ANN test data, get predictions of classical models on test and valid set and return these datasets to test
#ANN model
def prepareANNtestData(x_train,y_train,testdf,validdf):
    clf = SVC()
    clf.fit(x_train, y_train)
    y_test_SVM = clf.predict(testdf)
    y_valid_SVM = clf.predict(validdf)
    clf = RandomForestClassifier()
    clf.fit(x_train, y_train)
    y_test_RF = clf.predict(testdf)
    y_valid_RF = clf.predict(validdf)
    clf = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    clf.fit(x_train, y_train)
    y_test_KNN = clf.predict(testdf)
    y_valid_KNN = clf.predict(validdf)
    clf = LogisticRegression(random_state = 0)
    clf.fit(x_train, y_train)
    y_test_LR = clf.predict(testdf)
    y_valid_LR = clf.predict(validdf)
    df_test=pd.DataFrame()
    df_valid=pd.DataFrame()
    df_test['SVM'],df_test['RF'],df_test['KNN'],df_test['LR']=y_test_SVM, y_test_RF, y_test_KNN, y_test_LR
    df_valid['SVM'],df_valid['RF'],df_valid['KNN'],df_valid['LR']=y_valid_SVM, y_valid_RF, y_valid_KNN, y_valid_LR
    return df_test,df_valid

In [7]:
#get ANN train data
ANNtrainData = prepareANNtrainData(classical_x_train,classical_y_train,Left_for_ANN_x_train)

In [8]:
ANNtrainData

Unnamed: 0,SVM,RF,KNN,LR
0,1,0,0,0
1,1,1,1,1
2,1,0,0,1
3,1,1,1,1
4,1,0,0,1
...,...,...,...,...
4087,1,0,0,1
4088,1,0,0,1
4089,1,0,0,1
4090,1,0,0,1


In [9]:
#build classifier
def build_classifier():
    clf=Sequential()
    clf.add(Dense(output_dim=2,init='uniform',activation='relu',input_dim=4))
    clf.add(Dense(output_dim=1,init='uniform',activation='sigmoid'))
    clf.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
    return clf

In [10]:
#make necessary imports
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

#build ANN, use k fold cross validation
clf=KerasClassifier(build_fn=build_classifier, batch_size=10, nb_epoch=100)
accuracies=cross_val_score(estimator=clf, X=ANNtrainData,y=Left_for_ANN_y_train,cv=10,n_jobs=-1)

Using TensorFlow backend.


In [11]:
#see accuracies
accuracies

array([0.60243905, 0.52682924, 0.56723714, 0.54767728, 0.57946211,
       0.57457215, 0.58679706, 0.56479216, 0.58435208, 0.58679706])

In [12]:
#get ANN test and valid data
ANNtest,ANNvalid=prepareANNtestData(classical_x_train,classical_y_train,
                                    testdf.loc[:,'SentimentScore':'CountWord'],
                                    validdf.loc[:,'SentimentScore':'CountWord'])

In [13]:
ANNtest

Unnamed: 0,SVM,RF,KNN,LR
0,1,1,1,1
1,1,0,1,1
2,1,1,1,1
3,1,0,0,1
4,1,0,0,1
...,...,...,...,...
1262,1,1,1,1
1263,1,1,0,1
1264,1,1,0,1
1265,1,1,1,1


In [14]:
ANNvalid

Unnamed: 0,SVM,RF,KNN,LR
0,1,1,1,1
1,1,1,1,1
2,1,0,1,1
3,1,0,1,1
4,1,1,1,1
...,...,...,...,...
1279,1,1,0,1
1280,1,1,0,1
1281,1,1,1,1
1282,1,1,1,1


In [15]:
#fit on training data and check accuracies on both test and valid data
clf.fit(ANNtrainData,Left_for_ANN_y_train,batch_size=10, nb_epoch=100)
y_test_pred = clf.predict(ANNtest)
print('algorithm - test dataset accuracy - valid dataset accuracy')
print('Hybrid V2 - ' ,round(accuracy_score(testdf['label'], y_test_pred),4), ' - ', end='')
y_test_pred = clf.predict(ANNvalid)
print(round(accuracy_score(validdf['label'], y_test_pred),4))

  after removing the cwd from sys.path.
  """
  history = self.model.fit(x, y, **fit_args)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
algorithm - test dataset accuracy - valid dataset accuracy
Hybrid V2 -  0.5659  - 0.5389
