###Checkpoints
* Modelling - Done
* config file and .py files for test and train - Done
* Multi GPU training - Done
* CPU inferencing which outputs F1 score - Done
* Logging of training - Done
* Documentation of project - PPT Done, Report Done

# RUNNING CODE FROM .PY AND CONFIG FILES

In [18]:
!pip install transformers --quiet

In [19]:
!python3 saarthi_train.py --config external.json

Running on TPU  grpc://10.104.183.170:8470
2021-11-16 04:13:43.960829: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
Using 8 TPUs
Validation data consists of same sentences as train data if the sound path is ignored.
New train shape- (200, 4) , New val shape- (48, 4)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
2021-11-16 04:17:36.142738: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 154414080 exceeds 10% of free system memory.


In [20]:
!python3 saarthi_test.py --config 'external.json'  

Running on TPU  grpc://10.104.183.170:8470
2021-11-16 04:17:54.049937: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
Using 8 TPUs
Test shape is (248, 4)
F1 score for action--> 0.7862903225806451
F1 score for object--> 0.6653225806451613
F1 score for location--> 0.7661290322580645


# RUNNING IN COLAB

In [2]:
!pip install transformers --quiet
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn
from sklearn import metrics
import transformers
from transformers import AutoTokenizer
from sklearn.preprocessing import LabelEncoder
from transformers import TFRobertaModel
from keras.callbacks import CSVLogger
import json
import matplotlib.pyplot as plt
import random
import seaborn as sns
from sklearn.metrics import f1_score
import warnings
import logging, sys
import os
from datetime import datetime
logging.disable(sys.maxsize)
warnings.filterwarnings('ignore')

In [3]:
# Detect hardware, return appropriate distribution strategy, if tpus are available tpus are used
# Else if gpus are available gpus are used. If neither are available computation is done with CPUs
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    # Distribution strategy if tpus are available and is to be used
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print('Using {} TPUs'.format(strategy.num_replicas_in_sync))

elif tf.config.list_physical_devices('GPU'):
    # Distribution strategy in case of multiple GPUs
    strategy = tf.distribute.MirroredStrategy()
    print('Using {} GPUs'.format(strategy.num_replicas_in_sync))

else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.MirroredStrategy()
    print('No GPU nor TPU. Running on CPU')

AUTOTUNE = tf.data.experimental.AUTOTUNE

Running on TPU  grpc://10.104.183.170:8470
Using 8 TPUs


In [10]:
BATCH_SIZE = 32
MAX_LEN = 13
EPOCHS = 20
NUM_ACTION = 6
NUM_OBJECT = 14
NUM_LOCATION = 4
FOLDER_PATH = '/content/drive/MyDrive/Saarthi/'

# TRAINING (IGNORE IF NOT NEEDED)

In [17]:
train_df = pd.read_csv(FOLDER_PATH+'train_data.csv')
val_df = pd.read_csv(FOLDER_PATH+'valid_data.csv')
set1 = set(train_df['transcription'])
set2 = set(val_df['transcription'])
print('Validation data consists of same sentences as train data if the sound path is ignored.')
train_df.drop('path',axis='columns', inplace=True)
train_df.drop_duplicates(inplace=True)
full = train_df.sample(frac=1).reset_index(drop=True)
train_df = full.loc[:199,:]
val_df = full.loc[200:,:]
print("New train shape-",train_df.shape,", New val shape-",val_df.shape)

Validation data consists of same sentences as train data if the sound path is ignored.
New train shape- (200, 4) , New val shape- (48, 4)


In [6]:
action_enc = LabelEncoder()
action_train = action_enc.fit_transform(train_df['action'])
action_val = action_enc.transform(val_df['action'])

object_enc = LabelEncoder()
object_train = object_enc.fit_transform(train_df['object'])
object_val = object_enc.transform(val_df['object'])

location_enc = LabelEncoder()
location_train = location_enc.fit_transform(train_df['location'])
location_val = location_enc.transform(val_df['location'])

texts = train_df['transcription'].values
texts = list(texts)
val_texts = val_df['transcription'].values
val_texts = list(val_texts)

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
train_data = tokenizer(texts, max_length=MAX_LEN, padding='max_length', truncation=True, return_tensors='tf')
val_data = tokenizer(val_texts, max_length=MAX_LEN, padding='max_length', truncation=True, return_tensors='tf')

In [7]:
y_train = {'action': action_train, 'object':object_train, 'location': location_train}
y_val = {'action': action_val, 'object':object_val, 'location': location_val}

train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_data), y_train)).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_tensor_slices((dict(val_data), y_val)).batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)

In [11]:
%%time
def build_model():
    ids = tf.keras.layers.Input((MAX_LEN,), dtype=tf.int32, name='input_ids')
    att = tf.keras.layers.Input((MAX_LEN,), dtype=tf.int32, name='attention_mask')
    bert_model = TFRobertaModel.from_pretrained("roberta-base")
    x = bert_model(ids,attention_mask=att)
    x1 = tf.keras.layers.Flatten()(x[1])
    x1 = tf.keras.layers.Dense(NUM_ACTION, name='action')(x1)

    x2 = tf.keras.layers.Flatten()(x[1])
    x2 = tf.keras.layers.Dense(NUM_OBJECT, name='object')(x2)

    x3 = tf.keras.layers.Flatten()(x[1])
    x3 = tf.keras.layers.Dense(NUM_LOCATION, name='location')(x3)
    model = tf.keras.models.Model(inputs=[ids, att], outputs=[x1,x2,x3])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5, clipnorm=1.),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[tf.metrics.SparseCategoricalAccuracy(),tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name='Top_3')],
        )
    return model

with strategy.scope():
    model = build_model()

CPU times: user 13 s, sys: 6.37 s, total: 19.4 s
Wall time: 43.5 s


In [13]:
csv_logger = CSVLogger(FOLDER_PATH+'log.csv', append=True, separator=';')
history=model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, verbose=1, callbacks=[csv_logger])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
model.save_weights(FOLDER_PATH+'model.h5')
np.save(FOLDER_PATH+'action_encoder.npy', action_enc.classes_)
np.save(FOLDER_PATH+'object_encoder.npy', object_enc.classes_)
np.save(FOLDER_PATH+'location_encoder.npy', location_enc.classes_)

# PREDICTION

In [15]:
#folder containing model and the encoders
FOLDER_PATH = '/content/drive/MyDrive/Saarthi/'
#path to test dataset
TEST_PATH = FOLDER_PATH+'valid_data.csv'


test_df = pd.read_csv(TEST_PATH)
test_df.drop('path',axis='columns', inplace=True)
test_df.drop_duplicates(inplace=True)
print("Test shape is", test_df.shape)

action_enc = LabelEncoder()
action_enc.classes_ = np.load(FOLDER_PATH+'action_encoder.npy', allow_pickle=True)
action_test = action_enc.transform(test_df['action'])

object_enc = LabelEncoder()
object_enc.classes_ = np.load(FOLDER_PATH+'object_encoder.npy', allow_pickle=True)
object_test = object_enc.transform(test_df['object'])

location_enc = LabelEncoder()
location_enc.classes_ = np.load(FOLDER_PATH+'location_encoder.npy', allow_pickle=True)
location_test = location_enc.transform(test_df['location'])

test_texts = test_df['transcription'].values
test_texts = list(test_texts)
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

MAX_LEN = 13
test_data = tokenizer(test_texts, max_length=MAX_LEN, padding='max_length', truncation=True, return_tensors='tf')
y_test = {'action': action_test, 'object':object_test, 'location': location_test}
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_data), y_test)).batch(BATCH_SIZE)

def build_model():
    ids = tf.keras.layers.Input((MAX_LEN,), dtype=tf.int32, name='input_ids')
    att = tf.keras.layers.Input((MAX_LEN,), dtype=tf.int32, name='attention_mask')
    bert_model = TFRobertaModel.from_pretrained("roberta-base")
    x = bert_model(ids,attention_mask=att)
    x1 = tf.keras.layers.Flatten()(x[1])
    x1 = tf.keras.layers.Dense(NUM_ACTION, name='action')(x1)

    x2 = tf.keras.layers.Flatten()(x[1])
    x2 = tf.keras.layers.Dense(NUM_OBJECT, name='object')(x2)

    x3 = tf.keras.layers.Flatten()(x[1])
    x3 = tf.keras.layers.Dense(NUM_LOCATION, name='location')(x3)
    model = tf.keras.models.Model(inputs=[ids, att], outputs=[x1,x2,x3])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5, clipnorm=1.),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[tf.metrics.SparseCategoricalAccuracy(),tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name='Top_3')],
        )
    return model

with strategy.scope():
    model = build_model()

model.load_weights(FOLDER_PATH+'model.h5')
ans = model.predict(test_dataset)

action_preds = action_enc.inverse_transform(np.argmax(ans[0],axis=1))
object_preds = object_enc.inverse_transform(np.argmax(ans[1],axis=1))
location_preds = location_enc.inverse_transform(np.argmax(ans[2],axis=1))

pred_df = pd.DataFrame({'input':test_df['transcription'],'action':test_df['action'],'object':test_df['object'],'location':test_df['location'],
                        'action_preds':action_preds, 'object_preds':object_preds, 'location_preds':location_preds})

# micro f1 score
action_f1 = f1_score(pred_df['action_preds'],pred_df['action'], average='micro')
object_f1 = f1_score(pred_df['object_preds'],pred_df['object'], average='micro')
location_f1 = f1_score(pred_df['location_preds'],pred_df['location'], average='micro')
print('F1 score for action-->',action_f1)
print('F1 score for object-->',object_f1)
print('F1 score for location-->',location_f1)
pred_df.to_csv(FOLDER_PATH+'predictions.csv',index=False)

Test shape is (248, 4)
F1 score for action--> 1.0
F1 score for object--> 1.0
F1 score for location--> 1.0


In [16]:
print(pred_df)

                                          input  ... location_preds
0                            Turn on the lights  ...           none
1                           Turn off the lights  ...           none
2                               Change language  ...           none
3                               Pause the music  ...           none
4                                        Resume  ...           none
..                                          ...  ...            ...
243                Turn the washroom lights off  ...       washroom
244                                       Pause  ...           none
326                  Lights off in the washroom  ...       washroom
418                           Bedroom heat down  ...        bedroom
478  OK now switch the main language to Chinese  ...           none

[248 rows x 7 columns]
