**1. Import Libraries**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
                    
import os
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,f1_score, precision_score, recall_score


from keras.layers import Input
from keras import Model
from keras.preprocessing import sequence,text
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,LSTM,Conv1D,GlobalMaxPooling1D,Flatten,MaxPooling1D,GRU,SpatialDropout1D,Bidirectional
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.callbacks import Callback

import nltk
from nltk.tokenize import word_tokenize
from nltk import FreqDist
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer,WordNetLemmatizer
stemmer=SnowballStemmer('english')
lemma=WordNetLemmatizer()
from string import punctuation

import re
import os
import gc

import matplotlib.pyplot as plt

In [None]:
print(os.listdir("../input"))

In [None]:
#Import Data
df = pd.read_csv('../input/train.csv')

**2. Data Exploration**

In [None]:
print("Columns :", df.columns)                        #printing column names
print("Row 0 :") 
print("qid :", df.iloc[0]['qid'])                     #first example qid
print("question_text : ", df.iloc[0]['question_text'])#first example text
print("label :", df.iloc[0]['target'])                #first example label

In [None]:
#Exploring insincere questions
print("First 10 insincere questions:\n")
insincere_question = df[df['target'] == 1]['question_text'].values
for i in range(10):
    print(insincere_question[i])

**3. Data Preprocessing**

In [None]:
def clean_review(review_col):
    review_corpus=[]
    stops = set(stopwords.words("english"))
    for i in range(0,len(review_col)):
        review=str(review_col[i])
        review=re.sub('[^a-zA-Z]',' ',review)
        word_token = word_tokenize(str(review).lower())
        review=[lemma.lemmatize(w) for w in word_token if w not in stops]
        review=' '.join(review)
        review_corpus.append(review)
    return review_corpus

In [None]:
df['clean_question']=clean_review(df['question_text'].values)

In [None]:
df['clean_question'].head

In [None]:
y_train = df['target'].values
X_train_text = df['clean_question'].values

#Split into training (70%), validation (15%) and test (15%) data 
X_train_text, X_val_text, y_train, y_val = train_test_split(X_train_text, y_train, test_size=0.3)
X_val_text, X_test_text,y_val,y_test = train_test_split(X_val_text, y_val, test_size=0.5)

#Parameters to preprocess text data
num_unique_word = 166289 
MAX_QUESTION_LEN=125 #max allowable words in a question
MAX_FEATURES = num_unique_word #ceil on the number of unique words from courpus to use
MAX_WORDS = MAX_QUESTION_LEN #max allowable words in a question
tokenizer = Tokenizer(num_words=MAX_FEATURES) #tokenize training data
tokenizer.fit_on_texts(list(X_train_text))
X_train = tokenizer.texts_to_sequences(X_train_text)
X_val = tokenizer.texts_to_sequences(X_val_text)
X_test = tokenizer.texts_to_sequences(X_test_text)

X_train = sequence.pad_sequences(X_train, maxlen=MAX_WORDS)
X_val = sequence.pad_sequences(X_val, maxlen=MAX_WORDS)
X_test = sequence.pad_sequences(X_test, maxlen=MAX_WORDS)

In [None]:
print("Exmaple text: {0}\n\n".format(X_val_text[1]))
print("Corresponding vector\n: {0}\n\n".format(X_val[1]))
print("The word 'best' corresponds to token # {0}\n".format(tokenizer.word_index.get('best')))
print("The word 'home' corresponds to token # {0}\n".format(tokenizer.word_index.get('home')))
print("The word 'remedy' corresponds to token # {0}\n".format(tokenizer.word_index.get('remedy')))
print("The word 'migraine' corresponds to token # {0}\n".format(tokenizer.word_index.get('migraine')))

**4. Basline Models**

In [None]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm, grid_search
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.linear_model import LogisticRegression
from numpy.random import random_sample
from matplotlib.pyplot import plot

In [None]:
import numpy as np
from numpy.random import random_sample
from collections import Counter

class BaselineModel:
    def __init__(self,type='majority'):
        self.type = type
        print("Instance of {0} classifier".format(self.type))
    def fit(self, X , y):
        if self.type=='majority':
            counts = Counter(y)
            freq_max = 0
            for label, freq in counts.items():
                if freq>freq_max:
                    freq_max = freq
                    self.label = label
        elif self.type=='random':
                self.label = 0
                
    def predict(self, X):
        predictions = np.zeros((X.shape[0],1))
        if self.type == 'majority':
            predictions = np.ones((X.shape[0],1)) * self.label
        elif self.type == 'random':
            predictions = random_sample((X.shape[0],1))
            predictions[predictions>=0.5]=1
            predictions[predictions<0.5]=0
        
        return predictions

**Majority Classifier**

Always predict majority class

In [None]:
# 0 (sincere questions) is the majority class
modelMaj = BaselineModel(type='majority')
modelMaj.fit(X_train, y_train)
predictions_val  = modelMaj.predict(X_val)
predictions_test = modelMaj.predict(X_test)
print("Majority Classifier,   Val acc: {0},   Val F1 Score: {1}".format(metrics.accuracy_score(predictions_val, y_val),metrics.f1_score(predictions_val, y_val)))
print("Majority Classifier,   Test acc: {0},   Test F1 Score: {1}".format(metrics.accuracy_score(predictions_test, y_test),metrics.f1_score(predictions_test, y_test)))

In [None]:
predictions_val  = modelMaj.predict(X_val)
predictions_test = modelMaj.predict(X_test)
f1_val    , threshold_val   = line_search_f1_score(predictions_val , y_val)
acc_val   , threshold_val   = line_search_acc_score(predictions_val, y_val)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test)
print("Majority Classifier,   Val_acc:  {0},   F1 Score: {1}".format(acc_val,f1_val))
print("Majority Classifier,   Test_acc: {0},   F1 Score: {1}".format(acc_test,f1_test))

**Random Classifier**

Predict 0 or 1 randomly

In [None]:
# 0 (sincere questions) is the majority class
modelRand = BaselineModel(type='random')
modelRand.fit(X_train, y_train)

In [None]:
predictions_val = modelRand.predict(X_val)
predictions_test = modelRand.predict(X_test)
f1_val    , threshold_val  = line_search_f1_score(predictions_val , y_val)
acc_val   , threshold_val  = line_search_acc_score(predictions_val, y_val)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test)
print("Random Classifier,   Val_acc:  {0},   F1 Score: {1}".format(acc_val,f1_val))
print("Random Classifier,   Test_acc: {0},   F1 Score: {1}".format(acc_test,f1_test))

**Logistic Regression**

In [None]:
modelLR  = LogisticRegression(random_state=0, solver='lbfgs',class_weight="balanced",C=0.1)
modelLR.fit(X_train, y_train)
predictions_val = modelLR.predict(X_val)
predictions_test = modelLR.predict(X_test)

In [None]:
predictions_val = modelLR.predict(X_val)
predictions_test = modelLR.predict(X_test)
f1_val    , threshold_val  = line_search_f1_score(predictions_val , y_val)
acc_val   , threshold_val  = line_search_acc_score(predictions_val, y_val)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test)
print("Logistic Regression,   Val_acc:  {0},   F1 Score: {1}".format(acc_val,f1_val))
print("Logistic Regression,   Test_acc: {0},   F1 Score: {1}".format(acc_test,f1_test))

**Random Forest** 

In [None]:
from sklearn.ensemble import RandomForestClassifier
modelRF = RandomForestClassifier(criterion='gini', max_depth=10, class_weight='balanced')
modelRF.fit(X_train, y_train)

In [None]:
predictions_val = modelRF.predict(X_val)
predictions_test = modelRF.predict(X_test)
f1_val    , threshold_val   = line_search_f1_score(predictions_val , y_val)
acc_val   , threshold_val   = line_search_acc_score(predictions_val, y_val)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test)
print("Random Forest,   Val_acc:  {0},   F1 Score: {1}".format(acc_val,f1_val))
print("Random Forest,   Test_acc: {0},   F1 Score: {1}".format(acc_test,f1_test))

In [None]:
#Functions to prepare word embeddings
def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')
    
def get_embed_mat(EMBEDDING_FILE, max_features,embed_dim):
    # word vectors
    embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(EMBEDDING_FILE, encoding='utf8'))
    # embedding matrix
    word_index = tokenizer.word_index
    num_words = min(max_features, len(word_index) + 1)
    all_embs = np.stack(embeddings_index.values()) #for random init
    embedding_matrix = np.zeros((len(word_index) + 1, embed_dim))
    for word, i in word_index.items():
        if i >= max_features:       #use only the top 125 words as features
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    max_features = embedding_matrix.shape[0]
    
    return embedding_matrix

#gloveEmbed is the embedding matrix contatining a 300-dimensional vector for each of the 137803 unqiue words in
#our vocabulary
gloveEmbed = get_embed_mat('../input/embeddings/glove.840B.300d/glove.840B.300d.txt', MAX_FEATURES, 300)
word_index = tokenizer.word_index

In [None]:
def line_search_f1_score(y_score, y_test):
    max_f1_score = 0
    opt_threshold = 0
    for threshold in [i*0.01 for i in range(100)]:
        y_preds = y_score > threshold
        score = f1_score(y_preds, y_test)
        if max_f1_score < score:
            max_f1_score = score
            opt_threshold = threshold
    return max_f1_score, opt_threshold

def line_search_acc_score(y_score, y_test):
    max_acc_score = 0
    opt_threshold = 0
    for threshold in [i*0.01 for i in range(100)]:
        y_preds = y_score > threshold
        score = accuracy_score(y_preds, y_test)
        if max_acc_score < score:
            max_acc_score = score
            opt_threshold = threshold
    return max_acc_score, opt_threshold

class Metrics(Callback):
    def __init__(self):
        self.best_threshold = 0.5
        self.best_f1_score = 0
    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []
        self.best_f1_score = 0
    def on_epoch_end(self, epoch, logs={}):
         idx = np.random.randint(0,self.validation_data[0].shape[0],1000)
         val_predict = (np.asarray(self.model.predict(self.validation_data[0][idx], verbose=1))).round()
         val_targ = self.validation_data[1][idx]
         #_val_f1 = f1_score(val_targ, val_predict)
         _val_f1, threshold = line_search_f1_score(val_targ, val_predict)
         if _val_f1 > self.best_f1_score:
                self.best_f1_score = _val_f1
         self.best_threshold = threshold
         _val_recall = recall_score(val_targ, val_predict)
         _val_precision = precision_score(val_targ, val_predict)
         self.val_f1s.append(_val_f1)
         self.val_recalls.append(_val_recall)
         self.val_precisions.append(_val_precision)
         print(" — val_f1: %f — val_precision: %f — val_recall %f" %(_val_f1, _val_precision, _val_recall))
         return
 
metric = Metrics()

**LSTM**

In [None]:
lstm_out = 200
modelLSTM = Sequential()
embedding_layer = Embedding(len(word_index) + 1,300,weights=[gloveEmbed],input_length=MAX_WORDS,trainable=False)
modelLSTM.add(embedding_layer)
modelLSTM.add(LSTM(lstm_out, dropout_U = 0.4, dropout_W = 0.4))
modelLSTM.add(Dense(1,activation='sigmoid'))
modelLSTM.compile(loss = 'binary_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(modelLSTM.summary())
modelLSTM.fit(X_train, y_train,epochs=2, batch_size=1024, verbose=1,callbacks=[metric], validation_data = (X_val,y_val),class_weight="balanced")

In [None]:
predictions_val_lstm = modelLSTM.predict(X_val)
predictions_test_lstm = modelLSTM.predict(X_test)

f1_val   , threshold_val  = line_search_f1_score(predictions_val_lstm , y_val)
acc_val  , threshold_val  = line_search_acc_score(predictions_val_lstm , y_val)
f1_test  , threshold_test = line_search_f1_score(predictions_test_lstm, y_test)
acc_test , threshold_test = line_search_acc_score(predictions_test_lstm, y_test)

print("LSTM,   Val_acc: {0},   F1 Score: {1}".format(acc_val, f1_val))
print("LSTM,   Test acc: {0},   Test F1 Score: {1}".format(acc_test,f1_test))

**5. Dealing With Imbalanced Data**

In [None]:
from imblearn.over_sampling import SMOTENC
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.under_sampling import RandomUnderSampler

ennus = RandomUnderSampler(random_state=0, sampling_strategy='majority')
X_train_bal, y_train_bal = ennus.fit_resample(X_train, y_train)
X_val_bal, y_val_bal = ennus.fit_resample(X_val, y_val)
X_test_bal, y_test_bal  = ennus.fit_resample(X_test , y_test)

y_total    =np.concatenate((y_train,y_val,y_test), axis=0)
y_total_bal=np.concatenate((y_train_bal,y_val_bal,y_test_bal), axis=0)
n_rows = X_total.shape[0]
n_rows_bal = X_total_bal.shape[0]
n_insincere = len(y_total[y_total==1])
n_insincere_bal = len(y_total_bal[y_total_bal==1])

label_repart = pd.DataFrame(data={"" :[n_rows - n_insincere, n_insincere]}, index = [str(n_rows - n_insincere) + ' sincere questions', str(n_insincere) + ' insincere question'])
label_repart.plot(kind='pie', title='Sincere-to-Insincere Ratio (Before Undersampling) ' + str(round(n_insincere / n_rows,2)*100) + "%", subplots=True, figsize=(8,8))
label_repart = pd.DataFrame(data={"" :[n_rows_bal - n_insincere_bal, n_insincere_bal]}, index = [str(n_rows - n_insincere) + ' sincere questions', str(n_insincere) + ' insincere question'])
label_repart.plot(kind='pie', title='Sincere-to-Insincere Ratio (After Undersampling) ' + str(round(n_insincere_bal / n_rows_bal,2)*100) + "%", subplots=True, figsize=(8,8))

**6. All Classifiers Using Balanced Data**

**Majority Classifier (Balanced Data)**

Always predict majority class

In [None]:
# 0 (sincere questions) is the majority class
modelMaj_bal = BaselineModel(type='majority')
modelMaj_bal.fit(X_train_bal, y_train_bal)

In [None]:
predictions_val  = modelMaj_bal.predict(X_val_bal)
predictions_test = modelMaj_bal.predict(X_test_bal)
f1_val    , threshold_val  = line_search_f1_score(predictions_val , y_val_bal)
acc_val   , threshold_val  = line_search_acc_score(predictions_val, y_val_bal)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test_bal)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test_bal)
print("Majority Classifier,   Val acc: {0},   Val  F1 Score: {1}".format(acc_val,f1_val))
print("Majority Classifier,   Test_acc: {0},  Test F1 Score: {1}".format(acc_test,f1_test))

**Random Classifier (Balanced)**

Predict 0 or 1 randomly

In [None]:
# 0 (sincere questions) is the majority class
modelRand_bal = BaselineModel(type='random')
modelRand_bal.fit(X_train, y_train)

In [None]:
predictions_val  = modelRand_bal.predict(X_val_bal)
predictions_test = modelRand_bal.predict(X_test_bal)
f1_val    , threshold_val  = line_search_f1_score(predictions_val , y_val_bal)
acc_val   , threshold_val  = line_search_acc_score(predictions_val, y_val_bal)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test_bal)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test_bal)
print("Random Classifier,   Val acc: {0},   Val  F1 Score: {1}".format(acc_val,f1_val))
print("Random Classifier,   Test_acc: {0},  Test F1 Score: {1}".format(acc_test,f1_test))

**Logistic Regression (Balanced)**

Logistic Regression Hyperparameter Tuning

https://medium.com/@aneesha/svm-parameter-tuning-in-scikit-learn-using-gridsearchcv-2413c02125a0

In [None]:
# Helper function for tuning LR hyperparameters
def LR_param_selection(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    param_grid = {'C': Cs}
    grid_s = grid_search.GridSearchCV(LogisticRegression(random_state=0, solver='lbfgs',class_weight="balanced"), param_grid, cv=nfolds)
    grid_s.fit(X, y)
    grid_s.best_params_
    return grid_s.best_params_

#Finding best hyperparameters
best_params = LR_param_selection(X_train_bal, y_train_bal, 5)

In [None]:
print(best_params)

In [None]:
#Using best parameters for final model
modelLR_bal  = LogisticRegression(random_state=0, solver='lbfgs',class_weight="balanced",C=0.001)
modelLR_bal.fit(X_train_bal, y_train_bal)

In [None]:
predictions_val  = modelLR_bal.predict(X_val_bal)
predictions_test = modelLR_bal.predict(X_test_bal)
f1_val    , threshold_val  = line_search_f1_score(predictions_val , y_val_bal)
acc_val   , threshold_val  = line_search_acc_score(predictions_val, y_val_bal)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test_bal)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test_bal)
print("Logistic Regression,   Val acc: {0},   Val  F1 Score: {1}".format(acc_val,f1_val))
print("Logistic Regression,   Test_acc: {0},  Test F1 Score: {1}".format(acc_test,f1_test))

**Random Forest (Balanced)**

Random Forest Hyperparameter Tuning

https://medium.com/@aneesha/svm-parameter-tuning-in-scikit-learn-using-gridsearchcv-2413c02125a0

In [None]:
# Helper function for tuning RF hyperparameters
def RF_param_selection(X, y, nfolds):
    depths = [1,2,4,8,16,32]
    param_grid = {'max_depth': depths}
    grid_s = grid_search.GridSearchCV(RandomForestClassifier(criterion='gini', max_depth=10, class_weight='balanced'), param_grid, cv=nfolds)
    grid_s.fit(X, y)
    grid_s.best_params_
    return grid_s.best_params_

#Finding best hyperparameters
best_params = RF_param_selection(X_train_bal, y_train_bal, 5)
print(best_params)

In [None]:
modelRF_bal = RandomForestClassifier(criterion='gini', max_depth=16, class_weight='balanced')
modelRF_bal.fit(X_train_bal, y_train_bal)

In [None]:
predictions_val  = modelRF_bal.predict(X_val_bal)
predictions_test = modelRF_bal.predict(X_test_bal)
f1_val    , threshold_val  = line_search_f1_score(predictions_val , y_val_bal)
acc_val   , threshold_val  = line_search_acc_score(predictions_val, y_val_bal)
f1_test   , threshold_test  = line_search_f1_score(predictions_test , y_test_bal)
acc_test  , threshold_test  = line_search_acc_score(predictions_test, y_test_bal)
print("Random Forest,   Val acc:  {0},   Val  F1 Score: {1}".format(acc_val,f1_val))
print("Random Forest,   Test_acc: {0},   Test F1 Score: {1}".format(acc_test,f1_test))

**LSTM (Balanced)**

Hyperparameter Tuning

In [None]:
dropU=[0.2,0.4,0.6,0.8]
dropW=[0.2,0.4,0.6,0.8]
matrixF1  = np.zeros((4,4))
matrixAcc = np.zeros((4,4))

for i in range(len(dropU)):
    for j in range(len(dropW)):
        lstm_out = 200
        model = Sequential()
        model.add(embedding_layer)
        model.add(LSTM(lstm_out, dropout_U = dropU[i], dropout_W = dropW[j]))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss = 'binary_crossentropy', optimizer='adam',metrics = ['accuracy'])
        print(model.summary())
        model.fit(X_train_bal, y_train_bal,epochs=2, batch_size=1024, verbose=1,callbacks=[metric], validation_data = (X_val_bal,y_val_bal))
        predictions_val = model.predict(X_val_bal)
        f1_val   , threshold_val  = line_search_f1_score(predictions_val , y_val_bal)
        acc_val  , threshold_val  = line_search_acc_score(predictions_val , y_val_bal)
        matrixF1[i,j] = f1_val
        matrixAcc[i,j] = acc_val

In [None]:
print("Matrix of F1  values:\n",matrixF1,"\n")
print("Matrix of Acc values:\n",matrixAcc,"\n")
dUf1, dWf1 = np.unravel_index(np.argmax(matrixF1, axis=None), matrixF1.shape)
dUAc, dWAc = np.unravel_index(np.argmax(matrixAcc, axis=None), matrixAcc.shape)
print("Max F1  : {0}  @ dropU= {1}, dropW= {2}".format(np.max(matrixF1) , dropU[dUf1], dropW[dWf1]))
print("Max Acc : {0}  @ dropU= {1}, dropW= {2}".format(np.max(matrixAcc), dropU[dUAc] , dropW[dWAc]))

In [None]:
#Training LSTM with best dropout rates dropU=0.2, dropW=0.2
lstm_out = 200
modelLSTM_bal = Sequential()
modelLSTM_bal.add(embedding_layer)
modelLSTM_bal.add(LSTM(lstm_out, dropout_U = 0.2, dropout_W = 0.2))
modelLSTM_bal.add(Dense(1,activation='sigmoid'))
modelLSTM_bal.compile(loss = 'binary_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())
modelLSTM_bal.fit(X_train_bal, y_train_bal,epochs=2, batch_size=1024, verbose=1,callbacks=[metric], validation_data = (X_val_bal,y_val_bal))

In [None]:
predictions_val = modelLSTM_bal.predict(X_val_bal)
predictions_test = modelLSTM_bal.predict(X_test_bal)
f1_val   , threshold_val  = line_search_f1_score(predictions_val , y_val_bal)
acc_val  , threshold_val  = line_search_acc_score(predictions_val , y_val_bal)
f1_test  , threshold_test = line_search_f1_score(predictions_test, y_test_bal)
acc_test , threshold_test = line_search_acc_score(predictions_test, y_test_bal)

print("LSTM,   Val_acc : {0},    F1 Score: {1}".format(acc_val, f1_val))
print("LSTM,   Test acc: {0},   Test F1 Score: {1}".format(acc_test,f1_test))

**Saving all Models**

In [None]:
import pickle
filename = 'modelMaj_bal'
outfile = open(filename,'wb')
pickle.dump(modelMaj_bal,outfile)
outfile.close()
filename = 'modelRand_bal'
outfile = open(filename,'wb')
pickle.dump(modelRand_bal,outfile)
outfile.close()
filename = 'modelLR_bal'
outfile = open(filename,'wb')
pickle.dump(modelLR_bal,outfile)
outfile.close()
filename = 'modelRF_bal'
outfile = open(filename,'wb')
pickle.dump(modelRF_bal,outfile)
outfile.close()
filename = 'modelLSTM_bal'
outfile = open(filename,'wb')
pickle.dump(modelLSTM_bal,outfile)
outfile.close()
filename = 'modelMaj_bal'
outfile = open(filename,'wb')
pickle.dump(modelMaj_bal,outfile)
outfile.close()
filename = 'modelRand_bal'
outfile = open(filename,'wb')
pickle.dump(modelRand_bal,outfile)
outfile.close()
filename = 'modelLR_bal'
outfile = open(filename,'wb')
pickle.dump(modelLR_bal,outfile)
outfile.close()
filename = 'modelRF_bal'
outfile = open(filename,'wb')
pickle.dump(modelRF_bal,outfile)
outfile.close()
filename = 'modelLSTM_bal'
outfile = open(filename,'wb')
pickle.dump(modelLSTM_bal,outfile)
outfile.close()