<a href="https://colab.research.google.com/github/ppkgtmm/sentiment/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [49]:
# !pip install keras-tuner

In [57]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import  Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.metrics import Precision, Recall, FalseNegatives, \
FalsePositives, TrueNegatives, TruePositives
from sklearn.preprocessing import OneHotEncoder
from kerastuner.tuners import Hyperband
from kerastuner import HyperParameters
from keras.optimizers import Adam, Adadelta, Adagrad, RMSprop

In [65]:
cols = ['text', 'target']
data_path = '/content/drive/MyDrive/sentiment/data/data_preprocessed.csv'
num_words = 10000
max_len = 250
optimizers = [
           'Adam',
           'Adadelta',
           'Adagrad',
           'RMSprop'   
]
activations = ['relu', 'tanh', 'sigmoid']
lrs = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]

In [6]:
data = pd.read_csv(data_path)[cols]

In [7]:
data.sample(5)

Unnamed: 0,text,target
80324,perfect a wonderful cd i also got to experienc...,1
334643,there much less here than meets the eye yes it...,0
90085,NUMBER liteon recorder one of the most disappo...,0
320518,broke after NUMBER month quite a big blender N...,0
270485,finest is a subjective term along with the res...,0


In [18]:
train, val = train_test_split(data, test_size=0.15, stratify=data['target'])

In [19]:
train.shape, val.shape

((305988, 2), (53998, 2))

In [20]:
tokenizer = Tokenizer(num_words=num_words, oov_token='OOV')
tokenizer.fit_on_texts(train['text'])

In [26]:
def get_sequences(tokenizer, max_len, texts):
    return pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_len)

In [29]:
sequence_train = get_sequences(tokenizer, max_len, train['text'])
sequence_val = get_sequences(tokenizer, max_len, val['text'])

In [30]:
sequence_train.shape, sequence_val.shape

((305988, 250), (53998, 250))

In [35]:
val['target'].unique() == train['target'].unique()

array([ True,  True])

In [47]:
OH_encoder = OneHotEncoder()
train_target = OH_encoder.fit_transform(train['target'].values.reshape(-1,1))
val_target = OH_encoder.transform(val['target'].values.reshape(-1,1))

In [74]:
def get_optimizer(key, lr):
  map = {
      'Adam': Adam(learning_rate=lr),
      'Adadelta': Adadelta(learning_rate=lr),
      'Adagrad': Adagrad(learning_rate=lr),
      'RMSprop': RMSprop(learning_rate=lr)
  }
  return map.get(key)

In [71]:
def build_simple_mlp(hp):
    model = Sequential()
    model.add(Input(shape=(max_len,)))
    model.add(Dense(hp.get('hidden_units'),\
                           activation=hp.get("hidden_activation")))
    model.add(Dense(2, activation='softmax'))
    model.compile(
        optimizer=hp.get('optimizer'),
        loss='categorical_crossentropy',
        metrics=[
                 'accuracy',
                 Precision(), 
                 Recall(),
                 TruePositives(), 
                 TrueNegatives(), 
                 FalsePositives(),
                 FalseNegatives()
              ]
          )
    model.summary()
    return model

In [72]:
hp = HyperParameters()
hp.Int('hidden_units', 10, 30, 10)
hp.Choice('hidden_activation', activations)
hp.Choice('optimizer', optimizers)
hp.Choice('lr', lrs)

0.1

In [73]:
tuner = Hyperband(
    build_simple_mlp,
    max_epochs=10,
    objective='val_loss',
    executions_per_trial=4,
    directory='/content/drive/MyDrive/sentiment/MLP',
    project_name='simple_1HD',
    hyperparameters=hp
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                2510      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 22        
Total params: 2,532
Trainable params: 2,532
Non-trainable params: 0
_________________________________________________________________


SyntaxError: ignored

In [None]:
tuner.search(x_train, y_train, epochs=10, validation_split=0.1)