In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt

train_feat = pd.read_csv('preprocessed_train_data.csv')
train_lab = pd.read_csv('preprocessed_train_label.csv')

test_feat = pd.read_csv('preprocessed_test_data.csv')
test_lab = pd.read_csv('preprocessed_test_label.csv')


In [None]:
train_feat = train_feat.drop(columns = ['Class Index'])
test_feat = test_feat.drop(columns = ['Class Index'])

import ast

def clean (column):
  try:
    char_list = ast.literal_eval(column)
    joined_str = "".join(char_list)
    tokens  = joined_str.split()
    new_row = ' '.join(tokens)

    return new_row
  except(ValueError, SyntaxError):
    return ''


train_feat['text'] = train_feat['text'].apply(clean)
test_feat['text'] = test_feat['text'].apply(clean)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_feat['text'])
X_train_seq = tokenizer.texts_to_sequences(train_feat['text'])
X_train_pad = pad_sequences(X_train_seq, maxlen=100, padding='post')


tokenizer_ = Tokenizer(num_words=10000)
tokenizer_.fit_on_texts(test_feat['text'])
X_test_seq = tokenizer.texts_to_sequences(test_feat['text'])
X_test_pad = pad_sequences(X_test_seq, maxlen=100, padding='post')

train_feat = train_feat.iloc[:, 1:]
test_feat = test_feat.iloc[:, 1:]
train_lab = train_lab.iloc[:, 1:]
test_lab = test_lab.iloc[:, 1:]



In [None]:
pip install keras-tuner --upgrade



In [None]:
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense
import tensorflow as tf

model__n_hidden_neurons = [20, 40, 60, 80]
model__activation_function = ['relu', 'sigmoid', 'tanh']

def custom_sparse_categorical_crossentropy(y_true, y_pred):
    return tf.keras.losses.sparse_categorical_crossentropy(y_true - 1, y_pred)

def model_ltsm(model__n_hidden_neurons, model__activation_function, n_hidden_layers=2):
    model = keras.Sequential()
    model.add(LSTM(units=model__n_hidden_neurons, activation=model__activation_function, input_shape=(X_train_pad.shape[1], 1)))
    for i in range(n_hidden_layers):
        model.add(Dense(units=model__n_hidden_neurons, activation=model__activation_function))
    model.add(Dense(units=5, activation='softmax'))
    model.compile(optimizer='adam', loss=custom_sparse_categorical_crossentropy, metrics=['accuracy'])
    return model

def model_wrapper(hp):
    n_hidden_neurons = hp.Choice("model__n_hidden_neurons", model__n_hidden_neurons)
    activation_function = hp.Choice("model__activation_function", model__activation_function)
    return model_ltsm(n_hidden_neurons, activation_function, n_hidden_layers=2)

tuner = kt.Hyperband(
    model_wrapper,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    directory='my_dir',
    project_name='LTSM Results_FInal'
)

results = tuner.search(X_train_pad, train_lab, epochs=10, validation_split=0.2)


Trial 18 Complete [00h 08m 05s]
val_accuracy: 0.23591665923595428

Best val_accuracy So Far: 0.25858333706855774
Total elapsed time: 00h 52m 22s


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
best_model = tuner.get_best_models()[0]
best_model.summary()

  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
best_model.fit(X_train_pad, train_lab, epochs=10, validation_split=0.2)

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 47ms/step - accuracy: 0.2487 - loss: 2999.7354 - val_accuracy: 0.2485 - val_loss: 1.4879
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 47ms/step - accuracy: 0.2371 - loss: 7.0773 - val_accuracy: 0.0000e+00 - val_loss: 1.3981
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 47ms/step - accuracy: 0.2129 - loss: 1.3962 - val_accuracy: 0.2573 - val_loss: 1.3922
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 47ms/step - accuracy: 0.1898 - loss: 1.3913 - val_accuracy: 0.2487 - val_loss: 1.3896
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 47ms/step - accuracy: 0.2311 - loss: 1.3889 - val_accuracy: 0.2487 - val_loss: 1.3881
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 47ms/step - accuracy: 0.2331 - loss: 1.3877 - val_accuracy: 0.0000e+00 - val_loss: 

<keras.src.callbacks.history.History at 0x793e8a9baf90>

In [None]:
predictive_results = best_model.evaluate(X_test_pad, test_lab)
predictive_results

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.1397 - loss: 1.2414


[1.2413263320922852, 0.14657895267009735]