In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
train_feat = pd.read_csv('preprocessed_train_data.csv')
train_lab = pd.read_csv('preprocessed_train_label.csv')

test_feat = pd.read_csv('preprocessed_test_data.csv')
test_lab = pd.read_csv('preprocessed_test_label.csv')


In [None]:
train_feat = train_feat.drop(columns = ['Class Index'])
test_feat = test_feat.drop(columns = ['Class Index'])



In [None]:
train_feat

Unnamed: 0.1,Unnamed: 0,text
0,0,new windows planned for 2006 microsoft corp. a...
1,1,intel's president is promoted to c.e.o. the pr...
2,2,pentagon expands outposts in middle east washi...
3,3,"nortel cuts fewer jobs, exits real estate otta..."
4,4,"vijay swings to \$10 m kitty vijay singh, fiji..."
...,...,...
59995,59995,"as promised, party was quite subdued foxboroug..."
59996,59996,"no. 1 illinois 74, georgetown 59 washington - ..."
59997,59997,the first guantanamo trials are set to take pl...
59998,59998,e-gold tracks cisco code thief the electronic ...


In [None]:
import ast

def clean (column):
  try:
    char_list = ast.literal_eval(column)
    joined_str = "".join(char_list)
    tokens  = joined_str.split()
    new_row = ' '.join(tokens)

    return new_row
  except(ValueError, SyntaxError):
    return ''


train_feat['text'] = train_feat['text'].apply(clean)
test_feat['text'] = test_feat['text'].apply(clean)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_feat['text'])
X_train_seq = tokenizer.texts_to_sequences(train_feat['text'])
X_train_pad = pad_sequences(X_train_seq, maxlen=100, padding='post')


tokenizer_ = Tokenizer(num_words=10000)
tokenizer_.fit_on_texts(test_feat['text'])
X_test_seq = tokenizer.texts_to_sequences(test_feat['text'])
X_test_pad = pad_sequences(X_test_seq, maxlen=100, padding='post')

In [None]:
train_feat = train_feat.iloc[:, 1:]
test_feat = test_feat.iloc[:, 1:]
train_lab = train_lab.iloc[:, 1:]
test_lab = test_lab.iloc[:, 1:]

In [None]:
train_feat

Unnamed: 0,text
0,new windows planned for 2006 microsoft corp. a...
1,intel's president is promoted to c.e.o. the pr...
2,pentagon expands outposts in middle east washi...
3,"nortel cuts fewer jobs, exits real estate otta..."
4,"vijay swings to \$10 m kitty vijay singh, fiji..."
...,...
59995,"as promised, party was quite subdued foxboroug..."
59996,"no. 1 illinois 74, georgetown 59 washington - ..."
59997,the first guantanamo trials are set to take pl...
59998,e-gold tracks cisco code thief the electronic ...


In [None]:
train_lab

Unnamed: 0,Class Label
0,4
1,1
2,1
3,3
4,2
...,...
59995,2
59996,2
59997,1
59998,4


In [None]:
pip install keras-tuner --upgrade

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import kerastuner as kt

model__n_hidden_neurons = [20, 40, 60, 80]
model__activation_function = ['relu', 'sigmoid', 'tanh']

def build_mlp(model__n_hidden_neurons, model__activation_function, n_hidden_layers=2):
    model = keras.models.Sequential()
    # adding input layer
    model.add(keras.layers.Flatten())
    # adding hidden layers
    for layer in range(n_hidden_layers):
        model.add(keras.layers.Dense(model__n_hidden_neurons, activation=model__activation_function))
    # adding output layer
    model.add(keras.layers.Dense(5, activation="softmax"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

def build_mlp_wrapper(hp):
    n_hidden_neurons = hp.Choice("model__n_hidden_neurons", model__n_hidden_neurons)
    activation_function = hp.Choice("model__activation_function", model__activation_function)
    return build_mlp(n_hidden_neurons, activation_function, n_hidden_layers=2)


tuner = kt.Hyperband(build_mlp_wrapper,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='my_project_1')

results = tuner.search(X_train_pad, train_lab, epochs=10, validation_split=0.2)

Trial 18 Complete [00h 00m 30s]
val_accuracy: 0.3061666786670685

Best val_accuracy So Far: 0.30933332443237305
Total elapsed time: 00h 04m 16s


In [None]:
best_model = tuner.get_best_models()[0]
best_model.summary()

In [None]:
best_model.fit(X_train_pad, train_lab, epochs=10, validation_split=0.2)

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.3153 - loss: 1.3615 - val_accuracy: 0.3000 - val_loss: 1.3677
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.3170 - loss: 1.3616 - val_accuracy: 0.3062 - val_loss: 1.3655
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.3170 - loss: 1.3601 - val_accuracy: 0.3127 - val_loss: 1.3647
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.3182 - loss: 1.3603 - val_accuracy: 0.3053 - val_loss: 1.3676
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.3202 - loss: 1.3587 - val_accuracy: 0.3086 - val_loss: 1.3660
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.3153 - loss: 1.3574 - val_accuracy: 0.3081 - val_loss: 1.3660
Epoch 7/10
[1m1

<keras.src.callbacks.history.History at 0x7c7e6e262e90>

In [None]:
best_model.evaluate(X_test_pad, test_lab)

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3111 - loss: 1.3636


[1.3635796308517456, 0.31605264544487]