In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.1.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Load Data

In [3]:
with open(os.path.join("dataset", "sentiment140", "data.pickle"), "rb") as inFile:
    data = pickle.load(inFile)
X_train = data[0]
y_train = data[1]

In [4]:
PAD_MAXLEN = 45
MAX_FEATURES = 20000

## Get Mini Data for Experiment

The training data is very huge.  
To experiment with different model structures and parameters, can select mini data for experimental training.  

```python
from sklearn.model_selection import train_test_split
_, X_train_mini, _, y_train_mini = train_test_split(X_train, y_train, test_size=0.1)
print(X_train_mini.shape, y_train_mini.shape)
```

## Create Model

In [6]:
tf.keras.backend.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(MAX_FEATURES, 128, input_length=PAD_MAXLEN))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Conv1D(256, 5, padding='valid', activation='relu', strides=1))
model.add(tf.keras.layers.MaxPooling1D(pool_size=4))
#model.add(tf.keras.layers.GlobalMaxPooling1D())
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(80)))
#model.add(tf.keras.layers.Dense(200, activation="relu"))
#model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              #optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0005),
              metrics=['accuracy'])
model.summary()

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]

model.fit(X_train, y_train, batch_size=128, epochs=20, validation_split=0.2, verbose=1, callbacks=callbacks)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 45, 128)           2560000   
_________________________________________________________________
dropout (Dropout)            (None, 45, 128)           0         
_________________________________________________________________
conv1d (Conv1D)              (None, 41, 256)           164096    
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 10, 256)           0         
_________________________________________________________________
bidirectional (Bidirectional (None, 160)               215680    
_________________________________________________________________
dense (Dense)                (None, 1)                 161       
Total params: 2,939,937
Trainable params: 2,939,937
Non-trainable params: 0
______________________________________________

<tensorflow.python.keras.callbacks.History at 0x211e9049940>

In [7]:
if not os.path.exists("models"):
    os.makedirs("models")
model.save(os.path.join("models", "cnn.h5"), save_format="tf")