In [ ]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow
import pandas as pd
from tensorflow.keras import optimizers
from tensorflow import squeeze
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Embedding, Bidirectional, GRU, Dense, Dropout, Input, Flatten
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt



In [ ]:
train = pd.read_csv('../preprocessing_scripts/train.csv')
test = pd.read_csv('../preprocessing_scripts/test.csv')

In [ ]:
import bert
model_name = "uncased_L-12_H-768_A-12"
model_dir = bert.fetch_google_bert_model(model_name, ".models")
model_ckpt = os.path.join(model_dir, "bert_model.ckpt")

bert_params = bert.params_from_pretrained_ckpt(model_dir)
l_bert = bert.BertModelLayer.from_params(bert_params, name="bert",trainable=False)
max_seq_len = 100
l_input_ids      = tensorflow.keras.layers.Input(shape=(max_seq_len,), dtype='int32')
l_token_type_ids = tensorflow.keras.layers.Input(shape=(max_seq_len,), dtype='int32')

# using the default token_type/segment id 0
bertLayer = l_bert(l_input_ids)
#flat = Flatten()(bertLayer)
flat = Bidirectional(GRU(64, return_sequences=True))(bertLayer)
flat = Bidirectional(GRU(64, return_sequences=True))(flat)
flat = Bidirectional(GRU(64, return_sequences=True))(flat)
flat = Bidirectional(GRU(64, return_sequences=True))(flat)
flat = Bidirectional(GRU(64))(flat)
flat = Dense(128, activation='tanh')(flat)

output = Dense(1,activation = 'sigmoid')(flat)
                         # output: [batch_size, max_seq_len, hidden_size]
model = tensorflow.keras.Model(inputs=l_input_ids, outputs=output)
model.build(input_shape=(None, max_seq_len))
bert.load_bert_weights(l_bert, model_ckpt) 

Already  fetched:  uncased_L-12_H-768_A-12.zip
already unpacked at: .models/uncased_L-12_H-768_A-12
Done loading 196 BERT weights from: .models/uncased_L-12_H-768_A-12/uncased_L-12_H-768_A-12/bert_model.ckpt into <bert.model.BertModelLayer object at 0x7ff3ca2f4128> (prefix:bert_2). Count of weights not found in the checkpoint was: [0]. Count of weights with mismatched shape: [0]
Unused weights from checkpoint: 
	bert/embeddings/token_type_embeddings
	bert/pooler/dense/bias
	bert/pooler/dense/kernel
	cls/predictions/output_bias
	cls/predictions/transform/LayerNorm/beta
	cls/predictions/transform/LayerNorm/gamma
	cls/predictions/transform/dense/bias
	cls/predictions/transform/dense/kernel
	cls/seq_relationship/output_bias
	cls/seq_relationship/output_weights


[]

In [ ]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 100)]             0         
_________________________________________________________________
bert (BertModelLayer)        (None, 100, 768)          108890112 
_________________________________________________________________
bidirectional_1 (Bidirection (None, 100, 128)          320256    
_________________________________________________________________
bidirectional_2 (Bidirection (None, 100, 128)          74496     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 100, 128)          74496     
_________________________________________________________________
bidirectional_4 (Bidirection (None, 100, 128)          74496     
_________________________________________________________________
bidirectional_5 (Bidirection (None, 128)               7449

In [ ]:
with open('../preprocessing_scripts/new_train_data.txt', 'r') as f:
  tweets = f.readlines()

with open('../preprocessing_scripts/targets.csv', 'r') as f:
  targets = f.readlines()

In [ ]:
max_words = 14000
tokenizer = Tokenizer(num_words=max_words)
trainX = tweets[:6000]
trainY = targets[:6000]
testX = tweets[6000:]
testY = tweets[6000:]
maxlen = 100
tokenizer.fit_on_texts(trainX)
tokenized_version = tokenizer.texts_to_sequences(trainX)
tokenized_version = pad_sequences(tokenized_version, maxlen=maxlen)

In [ ]:
from tensorflow.keras import backend as K
def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall
def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
def f1(y_true, y_pred):
    precisionf = precision(y_true, y_pred)
    recallf = recall(y_true, y_pred)
    return 2*((precisionf*recallf)/(precisionf+recallf+K.epsilon()))

In [ ]:
trainY = np.array(trainY,dtype = 'int32')
model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy',f1,recall,precision]) 


In [ ]:
history = model.fit(x=tokenized_version, y=trainY, batch_size = 64, epochs=10, validation_split = 0.2)

Train on 4800 samples, validate on 1200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
