In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10949352895254782263
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1422032486
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3290278283155374358
physical_device_desc: "device: 0, name: GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [2]:
import keras
from keras.models import load_model
from Data_preprocessing import create_corpus
from create_embeddings import get_embedding
from sklearn.metrics import accuracy_score
import numpy as np

Using TensorFlow backend.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mihiraman\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
corpus,labels = create_corpus()

In [4]:
padded_corpus, embeddings_index, embedding_matrix, vocab_size = get_embedding(corpus,max_length=80,dim=50)

Loaded 400000 word vectors.


In [5]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)
labels = keras.utils.to_categorical(labels,num_classes=3)

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(padded_corpus,labels,test_size=0.2, random_state=42)

In [8]:
from model import build_model

In [9]:
model = []
model.append(build_model(embedding_size=50, max_words=80,y_dim=3,num_filters=200,filter_sizes = [1,2,3],dropout=0.5,vocab_size=vocab_size,embed=True, embedding_matrix=embedding_matrix, embedding_train=True))
model.append(build_model(embedding_size=50, max_words=80,y_dim=3,num_filters=200,filter_sizes = [3,4,5],dropout=0.5,vocab_size=vocab_size,embed=True, embedding_matrix=embedding_matrix, embedding_train=True))
model.append(build_model(embedding_size=50, max_words=80,y_dim=3,num_filters=200,filter_sizes = [5,6,7],dropout=0.5,vocab_size=vocab_size,embed=True, embedding_matrix=embedding_matrix, embedding_train=True))
# from IPython.display import Image
# Image(filename='shared_input_layer_testing.png') 

In [10]:
for i in range(3):
    model[i].compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    batch_size = 64
    num_epochs = 6
    epoch_save = [3,6]
    for j in range(num_epochs):
        model[i].fit(X_train, y_train, validation_split=0.025, batch_size=batch_size, epochs=1, verbose=0)
        if j+1 in epoch_save:
            model[i].save('models/model_' + str(i+1) + '_epoch_' + str(j+1) + '.h5')
# Timed : 00:01:56 

In [11]:
# load models from file
def load_all_models(model_list, epoch_list):
    all_models = []
    for epoch_num in epoch_list:
        for model_num in model_list: 
            filename = 'models/model_' + str(model_num) + '_epoch_' + str(epoch_num) + '.h5'
            temp_model = load_model(filename)
            all_models.append(temp_model)
            print('>loaded %s' % filename)
    return all_models

In [12]:
# load models in order
members = load_all_models([1,2,3], [6,3])
print('Loaded %d models' % len(members))

>loaded models/model_1_epoch_6.h5
>loaded models/model_2_epoch_6.h5
>loaded models/model_3_epoch_6.h5
>loaded models/model_1_epoch_3.h5
>loaded models/model_2_epoch_3.h5
>loaded models/model_3_epoch_3.h5
Loaded 6 models


In [13]:
def ensemble_predictions(members, X_test):
    preds = [model.predict(X_test) for model in members]
    preds = np.array(preds)
    # sum across ensemble members
    summed = np.sum(preds, axis=0)
    # argmax across classes
    result = np.argmax(summed, axis=1)
    return result

In [14]:
# evaluate a specific number of members in an ensemble
def evaluate_n_members(members, n_members, X_test, y_test):
    subset = members[:n_members]
    preds = ensemble_predictions(subset, X_test)
    
    return accuracy_score(y_test, preds)

In [15]:
single_scores = []
ensemble_scores = []
y_new = np.argmax(y_test,axis=1)
for i in range(1, len(members)+1):
    ensemble_score = evaluate_n_members(members, i, X_test, y_new)
    _, single_score = members[i-1].evaluate(X_test, y_test, verbose=0)
    print('> %d: single=%.3f, ensemble=%.3f' % (i, single_score, ensemble_score))
    ensemble_scores.append(ensemble_score)
    single_scores.append(single_score)

> 1: single=0.750, ensemble=0.607
> 2: single=0.756, ensemble=0.641
> 3: single=0.754, ensemble=0.642
> 4: single=0.748, ensemble=0.643
> 5: single=0.737, ensemble=0.638
> 6: single=0.741, ensemble=0.633


In [16]:
# In the next commit save model with non trainable embeddings and load it again with trainable embeddings
# weight_path="early_weights.hdf5"
# checkpoint = ModelCheckpoint(weight_path, monitor='val_f1_score', verbose=1, save_best_only=True, mode='max')
# early_stopping = EarlyStopping(monitor="val_f1_score", mode="max", patience=2)
# callbacks = [checkpoint, early_stopping]
# Load weights from previously trained model and retrain
# model.load_weights(weight_path)

In [17]:
#implement soft voting instead of hard voting