<a href="https://colab.research.google.com/github/ppkgtmm/sentiment/blob/GRU/GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# !pip install keras-tuner
# !rm -r sentiment
# !git clone --branch utils https://github.com/ppkgtmm/sentiment.git sentiment

In [3]:
import pandas as pd
from keras.preprocessing.text import  Tokenizer
from keras.layers import Input, Dense, Embedding, GRU
from keras.models import Sequential
from keras.metrics import Precision, Recall, FalseNegatives, \
FalsePositives, TrueNegatives, TruePositives
from kerastuner import HyperParameters
import matplotlib.pyplot as plt
from sentiment.utils import read_data, get_optimizer, get_sequences, split_data
from sentiment.utils import get_tuner, get_model_from_config, get_callbacks
from sentiment.utils import OH_fit_transform, OH_transform, load, dump, \
model_evaluate

In [4]:
cols = ['text', 'target']
data_path = '/content/drive/MyDrive/sentiment/data/data_preprocessed.csv'
test_path = '/content/drive/MyDrive/sentiment/data/test_data_preprocessed.csv'
seed = 123456
num_words = 10000
max_len = 250
optimizers = [
           'Adam',
           'RMSprop'   
]

In [5]:
data = read_data(data_path,cols)
data.sample(5)

Unnamed: 0,text,target
294825,just rent it do not be fooled by the commercia...,0
337708,blaze first kinda bored me well its NUMBER whe...,0
355269,top of the a list for female rockers there onl...,1
215014,entertaining jim henson continued genus his pu...,1
303732,good service but i bought this product and it ...,0


In [6]:
test = read_data(test_path, cols)
test.sample(5)

Unnamed: 0,text,target
3621,interesting but misguided noretta koertge is s...,0
18761,great this is a university report on impotence...,1
10600,sucks one of the worst movies i have ever seen...,0
7430,dissapointedconsumer the chairs are weak and w...,0
17379,it also a study guide the reviewer is right bu...,1


In [7]:
train, val = split_data(data)

In [8]:
tokenizer = Tokenizer(num_words=num_words, oov_token='OOV')
tokenizer.fit_on_texts(train['text'])

In [9]:
sequence_train = get_sequences(tokenizer, max_len, train['text'])
sequence_val = get_sequences(tokenizer, max_len, val['text'])

In [10]:
print(val['target'].unique(), train['target'].unique())

[1 0] [0 1]


In [11]:
train_target = OH_fit_transform(train['target'])
val_target = OH_transform(val['target'])

In [12]:
def build_gru(hp):
    model = Sequential()
    model.add(Input(shape=(max_len)))
    model.add(
        Embedding(
          num_words, hp.get('output_dim')
        )
    )
    model.add(GRU(hp.get('gru_units')))
    model.add(Dense(2, activation='softmax'))
    model.compile(
        optimizer=get_optimizer(hp.get('optimizer')),
        loss='categorical_crossentropy',
        metrics=[
                 'accuracy',
                 Precision(), 
                 Recall(),
                 TruePositives(), 
                 TrueNegatives(), 
                 FalsePositives(),
                 FalseNegatives()
              ]
          )
    model.summary()
    return model

In [13]:
hp = HyperParameters()
hp.Int('output_dim', 16, 64, 16)
hp.Choice('gru_units', [2, 8, 16, 32, 64])
hp.Choice('optimizer', optimizers)

'Adam'

In [23]:
tuner = get_tuner(build_gru,'/content/drive/MyDrive/sentiment/GRU',\
                  'GRU_TEMP_B32',hp, 2, 2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 250, 16)           160000    
_________________________________________________________________
gru (GRU)                    (None, 2)                 120       
_________________________________________________________________
dense (Dense)                (None, 2)                 6         
Total params: 160,126
Trainable params: 160,126
Non-trainable params: 0
_________________________________________________________________


In [24]:
history = tuner.search(
    sequence_train,
    train_target,
    epochs=2,
    validation_data=(sequence_val, val_target),
    batch_size=32,
    use_multiprocessing=True
)

Trial 2 Complete [00h 13m 06s]
val_loss: 0.16196909546852112

Best val_loss So Far: 0.16196909546852112
Total elapsed time: 00h 28m 05s
INFO:tensorflow:Oracle triggered exit


In [25]:
tuner.results_summary()

Results summary
Results in /content/drive/MyDrive/sentiment/GRU/GRU_TEMP_B32
Showing 10 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
output_dim: 64
gru_units: 64
optimizer: Adam
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.16196909546852112
Trial summary
Hyperparameters:
output_dim: 64
gru_units: 8
optimizer: Adam
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.17348305135965347


AttributeError: ignored