In [1]:
# !pip install tensorflow_hub

In [2]:
import os
import warnings
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("NLP_dataset_Mark_4.csv")
df

Unnamed: 0,0,1
0,do sports,Physical Activity
1,doing sports,Physical Activity
2,training,Physical Activity
3,to train,Physical Activity
4,sport,Physical Activity
...,...,...
1136,making bed,Daily Routine
1137,doing dishes,Daily Routine
1138,calling a loved one,Daily Routine
1139,mopping floors,Daily Routine


In [4]:
# a, b = df.shape
# for i in range(a):
#     if df['1'].iloc[i] == "Food":
#         df.loc[i, '1'] = "Daily Routine"

In [5]:
X, y = df["0"], df["1"]

y = pd.get_dummies(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=0,
                                                    shuffle=True,
                                                    stratify=y.to_numpy())

In [6]:
# Pre-Trained Text Embedding Model & Layer Definition
Embed = 'https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1'
# Embed = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/2'
# Embed = 'https://tfhub.dev/google/edgetpu/nlp/mobilebert-edgetpu/xs/1'
Trainable_Module = False
hub_layer = hub.KerasLayer(Embed, input_shape=[], dtype=tf.string, trainable=Trainable_Module)

# hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1",
#                            output_shape=[20], input_shape=[], dtype=tf.string)

# Build Model (Original Title Text)
model = tf.keras.Sequential()
model.add(hub_layer)           #pre-trained text embedding layer
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(4))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 20)                400020    
                                                                 
 dense (Dense)               (None, 16)                336       
                                                                 
 dense_1 (Dense)             (None, 4)                 68        
                                                                 
Total params: 400,424
Trainable params: 404
Non-trainable params: 400,020
_________________________________________________________________


In [7]:
adam = Adam(learning_rate=0.01)
model.compile(optimizer=adam,
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy']
              )

In [8]:
EPOCHS = 25
BATCH_SIZE = 16

tf.config.run_functions_eagerly(True)

history = model.fit(X_train, y_train, batch_size = BATCH_SIZE,
                    epochs = EPOCHS, validation_split = 0.1,
                    verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [9]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.17784957587718964
Test accuracy: 0.9082969427108765


In [10]:
y_true = [np.argmax(row, axis = 0) for row in y_test.to_numpy()]
y_pred = [np.argmax(row, axis = 0) for row in model.predict(X_test)]
target_names = ["Daily Routine",	"Passive Rest",	"Physical Activity",
                "Work-study"]
print(classification_report(y_true, y_pred, target_names=target_names))

                   precision    recall  f1-score   support

    Daily Routine       0.82      0.84      0.83        32
     Passive Rest       0.93      0.76      0.84        17
Physical Activity       0.85      0.97      0.91        64
       Work-study       0.97      0.91      0.94       116

         accuracy                           0.91       229
        macro avg       0.89      0.87      0.88       229
     weighted avg       0.91      0.91      0.91       229



In [11]:
dictionary = {0:"Daily Routine",
              1:"Passive Rest",
              2:"Physical Activity",
              3:"Work-study"}

example = 2

idx = np.argmax(model.predict(X_test)[example], axis = 0)
max_idx = np.argmax(y_test.iloc[example], axis = 0)
print("X:", X_test.iloc[example], "- predicted label:",
      dictionary[idx], "- true label:", dictionary[max_idx])
model.predict(X_test)[example]

X: science - predicted label: Work-study - true label: Work-study


array([-20.690817, -18.869396, -26.726114,  20.696804], dtype=float32)