In [1]:
 ## Deep Learning Sentiment Analysis
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import warnings
import joblib
import os
import requests
from pathlib import Path
import json
import cloudpickle
import pickle
from sklearn.model_selection import train_test_split
import re
from multiprocessing import cpu_count, Pool
import nltk
from nltk.corpus import stopwords
from itertools import chain

import plotly.express as px
import plotly.graph_objects as go


import tensorflow as tf

from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import skipgrams
from keras.callbacks import EarlyStopping
from keras.layers import Embedding, Dropout, Reshape, Activation, Input, Flatten, Dense, RepeatVector, BatchNormalization, LSTM, RepeatVector, MaxPooling1D,GlobalMaxPool1D, Conv1D, GRU, Bidirectional, Concatenate
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Dot
import keras.backend as K
import tensorflow as tf
print('GPU avialible: {}'.format(tf.config.list_physical_devices('GPU')))
pd.options.display.width = 500

import pandas as pd
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.9
run_config = tf.contrib.learn.RunConfig(session_config=config)


n_embedding_dims = 50

drop_rate = .2
num_filters = 25
kernal_size = 3


## load data
path = '../data/names.txt'
with open(path, 'r') as f:
    docs = f.readlines()
docs = [d.strip() for d in docs]

# fit the tokenizer
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(docs)
vocab_size = len(list(tokenizer.index_word.items()))
max_len = 20

print('max_len {0} vocab_size {1}'.format(max_len, vocab_size))
#
def data_gen(docs, n_batches=10, batch_size=10):
    n_docs = len(docs)
    sample_index = np.arange(batch_size, n_docs- (batch_size + int(batch_size/2)))
    for _ in range(n_batches):
        i = np.random.choice(sample_index, 1)[0]
        in_box_docs = docs[i:i+batch_size]
        out_box_docs = docs[i-int(batch_size/2):i] + docs[i+batch_size:i + batch_size + int(batch_size/2)]
        in_box_array = np.array(pad_sequences(tokenizer.texts_to_sequences(in_box_docs), maxlen=max_len))
        out_box_array = np.array(pad_sequences(tokenizer.texts_to_sequences(out_box_docs), maxlen=max_len))
        x = np.concatenate([in_box_array, in_box_array], axis=0)
        x_aux = np.concatenate([in_box_array, out_box_array], axis=0)
        y = np.array([1] * in_box_array.shape[0] + [0] * out_box_array.shape[0])
        assert x_aux.shape[0] == x.shape[0]
        assert y.shape[0] == x.shape[0]
        yield [x, x_aux], y
(x, x_aux), y = next(data_gen(docs))
print(x.shape, x_aux.shape, y.shape)


def get_model():

    inputs = Input(shape=(max_len,),  name='input')
    embedding_layer =  Embedding(vocab_size+ 1, n_embedding_dims,
                                 input_length=max_len,
                                 trainable=True)(inputs)
    drop1 = Dropout(drop_rate, name = "drop")(embedding_layer)
    conv1 = Conv1D(num_filters, kernel_size=kernal_size, activation='relu', name='conv1')(drop1)
    pool1 = GlobalMaxPool1D(name = "pool")(conv1)

    inputs_aux = Input(shape=(max_len,),  name='input_aux')
    embedding_layer_aux =  Embedding(vocab_size+ 1, n_embedding_dims,
                                 input_length=max_len,
                                 trainable=True)(inputs_aux )
    drop1_aux = Dropout(drop_rate, name = "drop_aux")(embedding_layer_aux)
    conv1_aux = Conv1D(num_filters, kernel_size=kernal_size, activation='relu', name='conv_aux')(drop1_aux )
    pool1_aux = GlobalMaxPool1D(name = "pool_aux")(conv1_aux)


    merge = Dot(axes=1)([pool1,pool1_aux])


    d1 = Dense(100,  activation = "relu", name = "hidden")(merge)
    drop2 = Dropout(drop_rate, name=  "drop2")(d1)
    act1 = Activation('relu', name = "activ_solo")(drop2)  ## weird that the activation is stack on a dropout layr
    outputs = Dense(1, activation='sigmoid', name='sigmoidOutput')(act1)
    model = Model(inputs=[inputs,inputs_aux],  outputs=outputs, name='MatchingNamesClassifier')

    model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
    return model

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    model = get_model()
print(model.summary())



model = get_model()
print(model.summary())

weights_path = "models/weights_best.hdf5"

# save only the best weights
checkpoint = ModelCheckpoint(weights_path ,mode='max' ,monitor='val_auc', verbose=1, save_best_only=True)

learning_rate = .0004

#update the optimizer learning rate
K.set_value(model.optimizer.lr,learning_rate)

lrPlateauReductionFactor = .5
lrMin = 0.000001

# reduces learning rate on performance platu
lrCheckPoint = ReduceLROnPlateau(monitor =  'val_loss', factor=lrPlateauReductionFactor, min=lrMin)

# stops training whenmodel fails to improve
esm =  EarlyStopping(patience=5, monitor='val_auc',mode='max')


n_epochs = 100
n_steps_per_epoch = 10
batch_size = 32

train_docs, test_docs = train_test_split(docs)

train_gen = data_gen(train_docs, n_batches=n_epochs * n_steps_per_epoch, batch_size=batch_size)
test_gen = data_gen(test_docs, n_batches=n_epochs * n_steps_per_epoch, batch_size=batch_size * 2)
model.fit(train_gen,
                    epochs=n_epochs,
                    steps_per_epoch= n_steps_per_epoch,
                    validation_data = test_gen,
                    validation_steps = n_steps_per_epoch,
                    callbacks=[esm, lrCheckPoint, checkpoint], shuffle=True)


Using TensorFlow backend.


GPU avialible: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


AttributeError: module 'tensorflow' has no attribute 'ConfigProto'