In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import tensorflow_hub as hub
from tensorflow_hub import KerasLayer
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical




In [2]:
print(tf.__version__)
print(hub.__version__)

2.15.0
0.16.1


In [3]:
file_path = '../data/processed/full_2k.csv'
full_2k = pd.read_csv(file_path)

## One-Hot Encode Genres

In [4]:
categories = full_2k['Category'].unique()

# Map each category to an integer
category_to_index = {cat: idx for idx, cat in enumerate(categories)}

print(category_to_index)

{'Biography': 0, 'Religion': 1, 'General': 2, 'History': 3, 'Literary': 4, 'Business': 5, 'Mystery': 6, 'Cooking': 7, 'Juvenile': 8, 'Romance': 9, 'Thrillers': 10}


In [5]:
full_2k['Category_idx'] = full_2k['Category'].map(category_to_index)

In [6]:
N_CLASSES = len(categories)

# One-hot encode
one_hot = to_categorical(full_2k['Category_idx'], num_classes=N_CLASSES)

print(one_hot)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


## Train/Test Split

In [7]:
N_TRAIN = int(len(full_2k)*0.8)

X_train, y_train = (full_2k.Description[:N_TRAIN], one_hot[:N_TRAIN])
X_test, y_test = (full_2k.Description[N_TRAIN:], one_hot[N_TRAIN:])

In [8]:
X_train.head(5), y_train

(0    The Russian author offers an affectionate chro...
 1    "The Bible and the social and moral consequenc...
 2    A New York Times Notable Book of the YearThis ...
 3    It was a wonderful summer, a great memory, the...
 4    For the first time ever, veteran World War II ...
 Name: Description, dtype: object,
 array([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32))

## NNLM Model

In [9]:
NNLM = "https://tfhub.dev/google/nnlm-en-dim50/2"

nnlm_module = hub.KerasLayer(
    NNLM, output_shape=[50], input_shape=[], dtype=tf.string, trainable=True)



















In [10]:
model = Sequential([
    nnlm_module,
    Dense(16, activation='relu'),
    Dense(N_CLASSES, activation='softmax')
])

model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)







In [14]:
history = model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=16)

Epoch 1/10












 143/1722 [=>............................] - ETA: 8:29 - loss: 2.2457 - accuracy: 0.2365

KeyboardInterrupt: 