In [None]:
import keras
import tensorflow as tf
from keras.models import Sequential
from keras import layers
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.callbacks import History 
from keras.models import load_model
from keras import optimizers

In [None]:
import string
import csv
import numpy as np
import pandas as pd

import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder

In [None]:
tf.test.is_gpu_available()

In [None]:
train = np.genfromtxt("train_clean.csv", encoding="utf-8", delimiter="\t", dtype = str)
test = np.genfromtxt("test_clean.csv", encoding="utf-8", delimiter="\t", dtype = str)

In [None]:
encoder = LabelBinarizer()
tf = TfidfVectorizer() ## IMPORTANT: needs to be saved as variable. This same instance will be used later to predict using same corpus.
X = tf.fit_transform(train[:, 0])
y = train[:, 1]
y = encoder.fit_transform(y)
test = tf.transform(test)

In [None]:
X_dim0 = X.shape[0]
X_dim1 = X.shape[1]

In [None]:
def build_model(neurons, activ1, activ2, l, lr, p):
    
    opt = optimizers.adam(learning_rate=lr)
    
    history = History()
    
    model = Sequential()
    model.add(layers.Dense(neurons, input_dim=X_dim1, activation=activ1))
    model.add(layers.Dense(20, activation=activ2))
    model.compile(loss=l, optimizer=opt, metrics=['accuracy'])
    return model

Training a neural network:

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
history = History()
model = build_model(neurons=500, activ1='relu', activ2='softmax', l='categorical_crossentropy', lr=0.0001, p=3)
model.fit(X_train, y_train, epochs=25, batch_size=100, validation_data=(X_test, y_test), callbacks=[history])

Ensembling a neural network, MNB, and SVM:

In [None]:
neural = build_model(neurons=500, activ1='relu', activ2='softmax', l='categorical_crossentropy', lr=0.0001, p=3)
MNB = MultinomialNB(alpha=0.175)
SVM = LinearSVC(C=0.25, loss='squared_hinge', penalty='l2')
SVM = CalibratedClassifierCV(SVM)

In [None]:
neural.fit(X, y, epochs=8, batch_size=100)
MNB.fit(X, encoder.inverse_transform(y))
SVM.fit(X, encoder.inverse_transform(y))

In [None]:
neural_preds = neural.predict(test)
MNB_preds = MNB.predict_proba(test)
SVM_preds = SVM.predict_proba(test)

Summing the probability predictions:

In [None]:
final_preds = neural_preds + MNB_preds + SVM_preds
final_preds = final_preds / 3
ensemble_predictions = encoder.inverse_transform(final_preds)

Outputting the predictions:

In [None]:
wtr = csv.writer(open ('nn_ensemble_predictions.csv', 'w'), delimiter=',', lineterminator='\n')
for p in ensemble_predictions : wtr.writerow ([p])