<a href="https://colab.research.google.com/github/ppkgtmm/sentiment/blob/main/MLP_3H.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# !pip install keras-tuner

In [5]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import  Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.metrics import Precision, Recall, FalseNegatives, \
FalsePositives, TrueNegatives, TruePositives
from sklearn.preprocessing import OneHotEncoder
from kerastuner.tuners import Hyperband
from kerastuner import HyperParameters
from keras.optimizers import Adam, Adadelta, Adagrad, RMSprop
from keras.activations import sigmoid, relu, tanh
import pickle

In [19]:

cols = ['text', 'target']
data_path = '/content/drive/MyDrive/sentiment/data/data_preprocessed.csv'
seed = 123456
num_words = 10000
max_len = 250
optimizers = [
           'Adam',
           'RMSprop'   
]
activations = ['sigmoid']

In [7]:
data = pd.read_csv(data_path)[cols]

In [8]:
data.sample(5)

Unnamed: 0,text,target
234876,perfect just what i needed to adjust the hunte...,1
23520,natural color is actually brown color shown as...,0
192917,great and exotic love story transport yourself...,1
19240,these are not art quilts i was really disappoi...,0
20627,errr who are you and what have you done to eri...,0


In [9]:
train, val = train_test_split(data, test_size=0.15, stratify=data['target'], random_state=seed)

In [10]:
train.shape, val.shape

((305988, 2), (53998, 2))

In [11]:
tokenizer = Tokenizer(num_words=num_words, oov_token='OOV')
tokenizer.fit_on_texts(train['text'])

In [12]:
def get_sequences(tokenizer, max_len, texts):
    return pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_len)

In [13]:
sequence_train = get_sequences(tokenizer, max_len, train['text'])
sequence_val = get_sequences(tokenizer, max_len, val['text'])

In [14]:
sequence_train.shape, sequence_val.shape

((305988, 250), (53998, 250))

In [15]:
print(val['target'].unique(), train['target'].unique())

[1 0] [0 1]


In [16]:
OH_encoder = OneHotEncoder(sparse=False)
train_target = OH_encoder.fit_transform(train['target'].values.reshape(-1,1))
val_target = OH_encoder.transform(val['target'].values.reshape(-1,1))

In [17]:
train_target.shape, val_target.shape

((305988, 2), (53998, 2))

In [18]:
def get_optimizer(key):
  map = {
      'Adam': Adam(),
      'Adadelta': Adadelta(),
      'Adagrad': Adagrad(),
      'RMSprop': RMSprop()
  }
  return map.get(key)

In [22]:
def build_mlp(hp):
    model = Sequential()
    model.add(Input(shape=(max_len,)))
    units = hp.get('hidden_units')
    model.add(Dense(units,\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(units+10,\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(units,\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(2, activation='softmax'))
    model.compile(
        optimizer=hp.get('optimizer'),
        loss='categorical_crossentropy',
        metrics=[
                 'accuracy',
                 Precision(), 
                 Recall(),
                 TruePositives(), 
                 TrueNegatives(), 
                 FalsePositives(),
                 FalseNegatives()
              ]
          )
    model.summary()
    return model

In [23]:
hp = HyperParameters()
hp.Int('hidden_units', 10, 30, 10)
hp.Choice('hidden_activation', activations)
hp.Choice('optimizer', optimizers)

'Adam'

In [32]:
tuner = Hyperband(
    build_mlp,
    max_epochs=4,
    objective='val_loss',
    executions_per_trial=2,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='MLP_3H_B32',
    hyperparameters=hp,
)

INFO:tensorflow:Reloading Oracle from existing project /content/drive/MyDrive/sentiment/MLP/MLP_3H_B32/oracle.json
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                2510      
_________________________________________________________________
dense_1 (Dense)              (None, 20)                220       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 22        
Total params: 2,962
Trainable params: 2,962
Non-trainable params: 0
_________________________________________________________________
INFO:tensorflow:Reloading Tuner from /content/drive/MyDrive/sentiment/MLP/MLP_3H_B32/tuner0.json


In [25]:
history = tuner.search(
    sequence_train,
    train_target,
    epochs=4,
    validation_data=(sequence_val, val_target),
    batch_size=32
)

Trial 8 Complete [00h 02m 46s]
val_loss: 0.6891468465328217

Best val_loss So Far: 0.6891468465328217
Total elapsed time: 00h 12m 02s
INFO:tensorflow:Oracle triggered exit


In [26]:
tuner.search_space_summary()

Search space summary
Default search space size: 3
hidden_units (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 30, 'step': 10, 'sampling': None}
hidden_activation (Choice)
{'default': 'sigmoid', 'conditions': [], 'values': ['sigmoid'], 'ordered': False}
optimizer (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'RMSprop'], 'ordered': False}


In [58]:
tuner.results_summary(num_trials=3)

Results summary
Results in /content/drive/MyDrive/sentiment/MLP/MLP_3H_B32
Showing 3 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
hidden_units: 30
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 4
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.6891468465328217
Trial summary
Hyperparameters:
hidden_units: 30
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6891677379608154
Trial summary
Hyperparameters:
hidden_units: 20
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6895217001438141


In [44]:
tuner.get_best_models(num_models=1)[0].evaluate(sequence_train, train_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                7530      
_________________________________________________________________
dense_1 (Dense)              (None, 40)                1240      
_________________________________________________________________
dense_2 (Dense)              (None, 30)                1230      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 62        
Total params: 10,062
Trainable params: 10,062
Non-trainable params: 0
_________________________________________________________________


[0.6894967555999756,
 0.5353020429611206,
 0.5353020429611206,
 0.5353020429611206,
 163796.0,
 163796.0,
 142192.0,
 142192.0]

In [45]:
tuner.get_best_models(num_models=1)[0].evaluate(sequence_val, val_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                7530      
_________________________________________________________________
dense_1 (Dense)              (None, 40)                1240      
_________________________________________________________________
dense_2 (Dense)              (None, 30)                1230      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 62        
Total params: 10,062
Trainable params: 10,062
Non-trainable params: 0
_________________________________________________________________


[0.6891140937805176,
 0.5365198850631714,
 0.5365198850631714,
 0.5365198850631714,
 28971.0,
 28971.0,
 25027.0,
 25027.0]

In [29]:
def build_OE(hp):
    model = Sequential()
    model.add(Input(shape=(max_len,)))
    units = hp.get('hidden_units')
    model.add(Dense(units,\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(units-10,\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(units,\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(2, activation='softmax'))
    model.compile(
        optimizer=hp.get('optimizer'),
        loss='categorical_crossentropy',
        metrics=[
                 'accuracy',
                 Precision(), 
                 Recall(),
                 TruePositives(), 
                 TrueNegatives(), 
                 FalsePositives(),
                 FalseNegatives()
              ]
          )
    model.summary()
    return model

In [30]:
hp2 = HyperParameters()
hp2.Int('hidden_units', 20, 40, 10)
hp2.Choice('hidden_activation', activations)
hp2.Choice('optimizer', optimizers)

'Adam'

In [34]:
tuner2 = Hyperband(
    build_OE,
    max_epochs=4,
    objective='val_loss',
    executions_per_trial=2,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='MLP_3HOEN_B32',
    hyperparameters=hp2,
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                5020      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_2 (Dense)              (None, 20)                220       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 42        
Total params: 5,492
Trainable params: 5,492
Non-trainable params: 0
_________________________________________________________________


In [35]:
history = tuner2.search(
    sequence_train,
    train_target,
    epochs=4,
    validation_data=(sequence_val, val_target),
    batch_size=32
)

Trial 8 Complete [00h 02m 30s]
val_loss: 0.6891522407531738

Best val_loss So Far: 0.6891522407531738
Total elapsed time: 00h 12m 16s
INFO:tensorflow:Oracle triggered exit


In [46]:
tuner2.get_best_models(num_models=1)[0].evaluate(sequence_train, train_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                5020      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_2 (Dense)              (None, 20)                220       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 42        
Total params: 5,492
Trainable params: 5,492
Non-trainable params: 0
_________________________________________________________________


[0.6894736886024475,
 0.5355830788612366,
 0.5355830788612366,
 0.5355830788612366,
 163882.0,
 163882.0,
 142106.0,
 142106.0]

In [50]:
tuner2.get_best_models(num_models=1)[0].evaluate(sequence_val, val_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                5020      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_2 (Dense)              (None, 20)                220       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 42        
Total params: 5,492
Trainable params: 5,492
Non-trainable params: 0
_________________________________________________________________


[0.6888649463653564,
 0.5393903255462646,
 0.5393903255462646,
 0.5393903255462646,
 29126.0,
 29126.0,
 24872.0,
 24872.0]

In [37]:
tuner2.search_space_summary()

Search space summary
Default search space size: 3
hidden_units (Int)
{'default': None, 'conditions': [], 'min_value': 20, 'max_value': 40, 'step': 10, 'sampling': None}
hidden_activation (Choice)
{'default': 'sigmoid', 'conditions': [], 'values': ['sigmoid'], 'ordered': False}
optimizer (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'RMSprop'], 'ordered': False}


In [59]:
tuner2.results_summary(num_trials=3)

Results summary
Results in /content/drive/MyDrive/sentiment/MLP/MLP_3HOEN_B32
Showing 3 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
hidden_units: 20
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 4
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.6891522407531738
Trial summary
Hyperparameters:
hidden_units: 30
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6892656087875366
Trial summary
Hyperparameters:
hidden_units: 30
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6897149980068207


In [39]:
hp_2 = HyperParameters()
hp_2.Int('hidden_units', 40, 60, 10)
hp_2.Choice('hidden_activation', activations)
hp_2.Choice('optimizer', optimizers)

'Adam'

In [40]:
tuner_2 = Hyperband(
    build_mlp,
    max_epochs=4,
    objective='val_loss',
    executions_per_trial=2,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='MLP_3H2_B32',
    hyperparameters=hp_2,
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 40)                10040     
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2050      
_________________________________________________________________
dense_2 (Dense)              (None, 40)                2040      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 82        
Total params: 14,212
Trainable params: 14,212
Non-trainable params: 0
_________________________________________________________________


In [41]:
history = tuner_2.search(
    sequence_train,
    train_target,
    epochs=4,
    validation_data=(sequence_val, val_target),
    batch_size=32
)

Trial 8 Complete [00h 02m 40s]
val_loss: 0.6898790597915649

Best val_loss So Far: 0.6890568435192108
Total elapsed time: 00h 12m 36s
INFO:tensorflow:Oracle triggered exit


In [48]:
tuner_2.get_best_models(num_models=1)[0].evaluate(sequence_train, train_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 60)                15060     
_________________________________________________________________
dense_1 (Dense)              (None, 70)                4270      
_________________________________________________________________
dense_2 (Dense)              (None, 60)                4260      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 122       
Total params: 23,712
Trainable params: 23,712
Non-trainable params: 0
_________________________________________________________________


[0.6892669200897217,
 0.5348477959632874,
 0.5348477959632874,
 0.5348477959632874,
 163657.0,
 163657.0,
 142331.0,
 142331.0]

In [49]:
tuner_2.get_best_models(num_models=1)[0].evaluate(sequence_val, val_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 60)                15060     
_________________________________________________________________
dense_1 (Dense)              (None, 70)                4270      
_________________________________________________________________
dense_2 (Dense)              (None, 60)                4260      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 122       
Total params: 23,712
Trainable params: 23,712
Non-trainable params: 0
_________________________________________________________________


[0.6890028119087219,
 0.539075493812561,
 0.539075493812561,
 0.539075493812561,
 29109.0,
 29109.0,
 24889.0,
 24889.0]

In [42]:
tuner_2.search_space_summary()

Search space summary
Default search space size: 3
hidden_units (Int)
{'default': None, 'conditions': [], 'min_value': 40, 'max_value': 60, 'step': 10, 'sampling': None}
hidden_activation (Choice)
{'default': 'sigmoid', 'conditions': [], 'values': ['sigmoid'], 'ordered': False}
optimizer (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'RMSprop'], 'ordered': False}


In [60]:
tuner_2.results_summary(num_trials=3)

Results summary
Results in /content/drive/MyDrive/sentiment/MLP/MLP_3H2_B32
Showing 3 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
hidden_units: 60
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 4
tuner/initial_epoch: 2
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: a77f3a085f79745be2ba8f584dd28eba
Score: 0.6890568435192108
Trial summary
Hyperparameters:
hidden_units: 60
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6891824305057526
Trial summary
Hyperparameters:
hidden_units: 40
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6893600821495056


In [51]:
hp2_2 = HyperParameters()
hp2_2.Int('hidden_units', 50, 70, 10)
hp2_2.Choice('hidden_activation', activations)
hp2_2.Choice('optimizer', optimizers)

'Adam'

In [52]:
tuner2_2 = Hyperband(
    build_OE,
    max_epochs=4,
    objective='val_loss',
    executions_per_trial=2,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='MLP_3HOEN2_B32',
    hyperparameters=hp2_2,
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                12550     
_________________________________________________________________
dense_1 (Dense)              (None, 40)                2040      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2050      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 102       
Total params: 16,742
Trainable params: 16,742
Non-trainable params: 0
_________________________________________________________________


In [53]:
history = tuner2_2.search(
    sequence_train,
    train_target,
    epochs=4,
    validation_data=(sequence_val, val_target),
    batch_size=32
)

Trial 8 Complete [00h 03m 09s]
val_loss: 0.6891224682331085

Best val_loss So Far: 0.6891224682331085
Total elapsed time: 00h 14m 29s
INFO:tensorflow:Oracle triggered exit


In [56]:
tuner2_2.get_best_models(num_models=1)[0].evaluate(sequence_train, train_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                12550     
_________________________________________________________________
dense_1 (Dense)              (None, 40)                2040      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2050      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 102       
Total params: 16,742
Trainable params: 16,742
Non-trainable params: 0
_________________________________________________________________


[0.689332902431488,
 0.5354000926017761,
 0.5354000926017761,
 0.5354000926017761,
 163826.0,
 163826.0,
 142162.0,
 142162.0]

In [57]:
tuner2_2.get_best_models(num_models=1)[0].evaluate(sequence_val, val_target, batch_size=32)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                12550     
_________________________________________________________________
dense_1 (Dense)              (None, 40)                2040      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2050      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 102       
Total params: 16,742
Trainable params: 16,742
Non-trainable params: 0
_________________________________________________________________


[0.6890793442726135,
 0.5386680960655212,
 0.5386680960655212,
 0.5386680960655212,
 29087.0,
 29087.0,
 24911.0,
 24911.0]

In [54]:
tuner2_2.search_space_summary()

Search space summary
Default search space size: 3
hidden_units (Int)
{'default': None, 'conditions': [], 'min_value': 50, 'max_value': 70, 'step': 10, 'sampling': None}
hidden_activation (Choice)
{'default': 'sigmoid', 'conditions': [], 'values': ['sigmoid'], 'ordered': False}
optimizer (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'RMSprop'], 'ordered': False}


In [61]:
tuner2_2.results_summary(num_trials=3)

Results summary
Results in /content/drive/MyDrive/sentiment/MLP/MLP_3HOEN2_B32
Showing 3 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
hidden_units: 50
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 4
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.6891224682331085
Trial summary
Hyperparameters:
hidden_units: 60
hidden_activation: sigmoid
optimizer: Adam
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6892454922199249
Trial summary
Hyperparameters:
hidden_units: 60
hidden_activation: sigmoid
optimizer: RMSprop
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.6892657279968262
