<a href="https://colab.research.google.com/github/ppkgtmm/sentiment/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# !pip install keras-tuner

In [40]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import  Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.metrics import Precision, Recall, FalseNegatives, \
FalsePositives, TrueNegatives, TruePositives
from sklearn.preprocessing import OneHotEncoder
from kerastuner.tuners import Hyperband
from kerastuner import HyperParameters
from keras.optimizers import Adam, Adadelta, Adagrad, RMSprop
from keras.activations import sigmoid, relu, tanh

In [5]:
cols = ['text', 'target']
data_path = '/content/drive/MyDrive/sentiment/data/data_preprocessed.csv'
seed = 123456
num_words = 10000
max_len = 250
optimizers = [
           'Adam',
           'Adadelta',
           'Adagrad',
           'RMSprop'   
]
activations = ['relu', 'tanh', 'sigmoid']
lrs = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]

In [6]:
data = pd.read_csv(data_path)[cols]

In [7]:
data.sample(5)

Unnamed: 0,text,target
248633,the most sucessful album of engenheiros engenh...,1
164322,not the original release this is a new commerc...,1
209936,fantastic tango music wonderful choice of musi...,1
264066,your best life now journal very helpful exerci...,1
143903,scrabble a dud on windows NUMBER i have had th...,0


In [8]:
train, val = train_test_split(data, test_size=0.15, stratify=data['target'], random_state=seed)

In [9]:
train.shape, val.shape

((305988, 2), (53998, 2))

In [10]:
tokenizer = Tokenizer(num_words=num_words, oov_token='OOV')
tokenizer.fit_on_texts(train['text'])

In [11]:
def get_sequences(tokenizer, max_len, texts):
    return pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_len)

In [12]:
sequence_train = get_sequences(tokenizer, max_len, train['text'])
sequence_val = get_sequences(tokenizer, max_len, val['text'])

In [13]:
sequence_train.shape, sequence_val.shape

((305988, 250), (53998, 250))

In [18]:
print(val['target'].unique(), train['target'].unique())

[1 0] [0 1]


In [35]:
OH_encoder = OneHotEncoder(sparse=False)
train_target = OH_encoder.fit_transform(train['target'].values.reshape(-1,1))
val_target = OH_encoder.transform(val['target'].values.reshape(-1,1))

In [38]:
train_target.shape, val_target.shape

((305988, 2), (53998, 2))

In [19]:
def get_optimizer(key, lr):
  map = {
      'Adam': Adam(learning_rate=lr),
      'Adadelta': Adadelta(learning_rate=lr),
      'Adagrad': Adagrad(learning_rate=lr),
      'RMSprop': RMSprop(learning_rate=lr)
  }
  return map.get(key)

In [None]:
def get_activation(key, lr):
  map = {
      'sigmoid': sigmoid(),
      'tanh': ,
      'relu': ,
  }
  return map.get(key)

In [20]:
def build_simple_mlp(hp):
    model = Sequential()
    model.add(Input(shape=(max_len,)))
    model.add(Dense(hp.get('hidden_units'),\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(2, activation='softmax'))
    model.compile(
        optimizer=hp.get('optimizer'),
        loss='categorical_crossentropy',
        metrics=[
                 'accuracy',
                 Precision(), 
                 Recall(),
                 TruePositives(), 
                 TrueNegatives(), 
                 FalsePositives(),
                 FalseNegatives()
              ]
          )
    model.summary()
    return model

In [41]:
hp = HyperParameters()
hp.Int('hidden_units', 10, 30, 10)
hp.Choice('hidden_activation', activations)
hp.Choice('optimizer', optimizers)
hp.Choice('lr', lrs)

0.1

In [42]:
tuner = Hyperband(
    build_simple_mlp,
    max_epochs=10,
    objective='val_loss',
    executions_per_trial=2,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='MLP_1H_B32',
    hyperparameters=hp
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                2510      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 22        
Total params: 2,532
Trainable params: 2,532
Non-trainable params: 0
_________________________________________________________________


In [None]:
history = tuner.search(
    sequence_train,
    train_target,
    epochs=10,
    validation_data=(sequence_val, val_target),
    batch_size=32
)


Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
hidden_units      |10                |?                 
hidden_activation |tanh              |?                 
optimizer         |RMSprop           |?                 
lr                |0.01              |?                 
tuner/epochs      |2                 |?                 
tuner/initial_e...|0                 |?                 
tuner/bracket     |2                 |?                 
tuner/round       |0                 |?                 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                2510      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 22        
Total params: 2,532
Trainable params: 2,532
Non-trainable params: 0
_________________________________________