<a href="https://colab.research.google.com/github/ppkgtmm/sentiment/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# !pip install keras-tuner

In [40]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import  Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.metrics import Precision, Recall, FalseNegatives, \
FalsePositives, TrueNegatives, TruePositives
from sklearn.preprocessing import OneHotEncoder
from kerastuner.tuners import Hyperband
from kerastuner import HyperParameters
from keras.optimizers import Adam, Adadelta, Adagrad, RMSprop
from keras.activations import sigmoid, relu, tanh

In [45]:
cols = ['text', 'target']
data_path = '/content/drive/MyDrive/sentiment/data/data_preprocessed.csv'
seed = 123456
num_words = 10000
max_len = 250
optimizers = [
           'Adam',
           'Adadelta',
           'Adagrad',
           'RMSprop'   
]
activations = ['relu', 'tanh', 'sigmoid']
# lrs = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]

In [6]:
data = pd.read_csv(data_path)[cols]

In [7]:
data.sample(5)

Unnamed: 0,text,target
248633,the most sucessful album of engenheiros engenh...,1
164322,not the original release this is a new commerc...,1
209936,fantastic tango music wonderful choice of musi...,1
264066,your best life now journal very helpful exerci...,1
143903,scrabble a dud on windows NUMBER i have had th...,0


In [8]:
train, val = train_test_split(data, test_size=0.15, stratify=data['target'], random_state=seed)

In [9]:
train.shape, val.shape

((305988, 2), (53998, 2))

In [10]:
tokenizer = Tokenizer(num_words=num_words, oov_token='OOV')
tokenizer.fit_on_texts(train['text'])

In [11]:
def get_sequences(tokenizer, max_len, texts):
    return pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_len)

In [12]:
sequence_train = get_sequences(tokenizer, max_len, train['text'])
sequence_val = get_sequences(tokenizer, max_len, val['text'])

In [13]:
sequence_train.shape, sequence_val.shape

((305988, 250), (53998, 250))

In [18]:
print(val['target'].unique(), train['target'].unique())

[1 0] [0 1]


In [35]:
OH_encoder = OneHotEncoder(sparse=False)
train_target = OH_encoder.fit_transform(train['target'].values.reshape(-1,1))
val_target = OH_encoder.transform(val['target'].values.reshape(-1,1))

In [38]:
train_target.shape, val_target.shape

((305988, 2), (53998, 2))

In [46]:
def get_optimizer(key):
  map = {
      'Adam': Adam(),
      'Adadelta': Adadelta(),
      'Adagrad': Adagrad(),
      'RMSprop': RMSprop()
  }
  return map.get(key)

In [20]:
def build_simple_mlp(hp):
    model = Sequential()
    model.add(Input(shape=(max_len,)))
    model.add(Dense(hp.get('hidden_units'),\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(2, activation='softmax'))
    model.compile(
        optimizer=hp.get('optimizer'),
        loss='categorical_crossentropy',
        metrics=[
                 'accuracy',
                 Precision(), 
                 Recall(),
                 TruePositives(), 
                 TrueNegatives(), 
                 FalsePositives(),
                 FalseNegatives()
              ]
          )
    model.summary()
    return model

In [47]:
hp = HyperParameters()
hp.Int('hidden_units', 10, 30, 10)
hp.Choice('hidden_activation', activations)
hp.Choice('optimizer', optimizers)

'Adam'

In [48]:
tuner = Hyperband(
    build_simple_mlp,
    max_epochs=10,
    objective='val_loss',
    executions_per_trial=2,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='MLP_1H_B32',
    hyperparameters=hp,
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                2510      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 22        
Total params: 2,532
Trainable params: 2,532
Non-trainable params: 0
_________________________________________________________________


In [49]:
history = tuner.search(
    sequence_train,
    train_target,
    epochs=10,
    validation_data=(sequence_val, val_target),
    batch_size=32
)

Trial 30 Complete [00h 16m 47s]
val_loss: 0.693591445684433

Best val_loss So Far: 0.6891161203384399
Total elapsed time: 03h 23m 25s
INFO:tensorflow:Oracle triggered exit


In [55]:
tuner.search_space_summary()

Search space summary
Default search space size: 3
hidden_units (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 30, 'step': 10, 'sampling': None}
hidden_activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh', 'sigmoid'], 'ordered': False}
optimizer (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'Adadelta', 'Adagrad', 'RMSprop'], 'ordered': False}


In [56]:
tuner.results_summary()

Results summary
Results in /content/drive/MyDrive/sentiment/MLP/MLP_1H_B32
Showing 10 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
hidden_units: 10
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 10
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.6891161203384399
Trial summary
Hyperparameters:
hidden_units: 30
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: c37ca39be27500401abcb9ef7b9de072
Score: 0.689337432384491
Trial summary
Hyperparameters:
hidden_units: 20
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: afb8e4350d472e9bb627c5ae6873135e
Score: 0.6895457804203033
Trial summary
Hyperparameters:
hidden_units: 30
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 4
tuner/initial_epoch: 2
tuner/bracket: 2
tuner/round: 1
tuner/trial_id: 2ff1c11

In [60]:
best_params = tuner.get_best_hyperparameters(1)[0]
print(f"hidden units best: {best_params.get('hidden_units')}")
print(f"hidden activation best: {best_params.get('hidden_activation')}")
print(f"optimizer best: {best_params.get('optimizer')}")

hidden units best: 10
hidden activation best: sigmoid
optimizer best: RMSprop
