# Doing the NLP side

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.feature_extraction.text import CountVectorizer
from tools import get_data, load_data
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm



## Load the data

In [2]:
# import data
train_data = get_data("./dataset/train.csv")
test_data = get_data("./dataset/test.csv")

# perform text cleaning and get the pandas' dataframe
train_data = load_data(train_data)
test_data = load_data(test_data, has_label=False)

# join the data together
for_nlp_data = pd.concat((train_data['caption'], test_data['caption']), ignore_index=True)

In [3]:
train_data['caption']

0          woman in swim suit holding parasol on sunny day
1        a couple of men riding horses on top of a gree...
2        they are brave for riding in the jungle on tho...
3        a black and silver clock tower at an intersect...
4          a train coming to a stop on the tracks out side
                               ...                        
29995    a picture of a truck that is in the middle of ...
29996    a plate topped with a pizza being cut with a s...
29997              a man riding a snowboard on top of snow
29998      this photo shows people skiing in the mountains
29999    two young men playing soccer and fighting for ...
Name: caption, Length: 30000, dtype: object

In [4]:
test_data['caption']

0       a little girl waring a krispy kreme hat holdin...
1       a beautiful young woman holding an orange frisbee
2       a group of people sitting on couch next to a c...
3               a person on a snowboard rides on the hill
4       a man riding a skateboard with a helmet on in ...
                              ...                        
9995    a group of men riding surfboards riding a mass...
9996    a motorcycle parked next to a car in a parking...
9997              a little boy that is playing with a wii
9998    group of kids play frisbee golf in the middle ...
9999      a man in a gray jacket standing next to a woman
Name: caption, Length: 10000, dtype: object

In [5]:
lengths = [len(sentence) for sentence in train_data['caption'].to_list()]
print(max(lengths))

lengths = [len(sentence) for sentence in test_data['caption'].to_list()]
print(max(lengths))

237
230


## Count Vector

In [6]:
vectorizer = CountVectorizer(lowercase=True)
vectorizer.fit(for_nlp_data)

X_train_vec = vectorizer.transform(train_data['caption'])
X_test_vec = vectorizer.transform(test_data['caption'])

In [7]:
# the size of vocabulary is 8075
print(X_train_vec.shape)
print(X_test_vec.shape)

(30000, 8075)
(10000, 8075)


In [8]:
X_train_tensor = torch.from_numpy(X_train_vec.todense()).int()
X_test_tensor = torch.from_numpy(X_test_vec.todense()).int()
y_train_tensor = torch.from_numpy(np.array(train_data.iloc[:, 2:]))
y_test_tensor = torch.from_numpy(np.array(test_data.iloc[:, 2:]))

In [9]:
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(
    dataset=train_data, 
    batch_size=16, 
    shuffle=True
)

## Create the Model Here

In [10]:
class RNN_LSTM(nn.Module):
    
	def __init__(self, num_embeddings: int, embedding_dim: int, hidden_size: int, output_size: int) -> None:
		super(RNN_LSTM, self).__init__()

		# Add the word embedding layer
		self.embedding_layer = nn.Embedding(num_embeddings=num_embeddings+1, embedding_dim=embedding_dim)

		# Add the LSTM Layer
		self.lstm_layer = nn.LSTM(input_size=embedding_dim*num_embeddings, hidden_size=hidden_size)

		# Add the Output Layer
		self.fc_layer1 = nn.Linear(in_features=hidden_size, out_features=output_size)
		
	def forward(self, x):
		embeds = self.embedding_layer(x)
		lstm_out, _ = self.lstm_layer(embeds.view(len(x), -1))
		tag_space = self.fc_layer1(lstm_out)		
		tag_scores = F.tanh(tag_space)
		return tag_scores

In [11]:
epochs = 5
threshold = 0.5

model = RNN_LSTM(
    num_embeddings=8075,
    embedding_dim=3,
    hidden_size=1000,
    output_size=19,
)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=0.01,
)

# utilise GPU
if torch.cuda.is_available():
    print('using GPU')
    model = model.to('cuda')

using GPU


In [12]:
train_losses = []
train_accs = []
for epoch in range(epochs):

	n_total = 0
	n_correct = 0
	train_loss = 0.
	model.train()
	for captions, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} Training: "):

		if torch.cuda.is_available():
			captions = captions.to('cuda')
			labels = labels.to('cuda')

		y_pred = model(captions)

		# backward
		loss = loss_fn(y_pred, labels)
		loss.backward()

		# update
		optimizer.step()

		# compare
		predicted = (y_pred > threshold).int()

		train_loss += loss.item()
		n_correct += torch.all(torch.eq(predicted, labels), dim=1).sum()
		n_total += labels.shape[0]

	train_losses.append(train_loss / len(train_loader))
	train_accs.append(n_correct / n_total)

	print("Epoch {:d}, Train Loss: {:.7f}, Train Accuracy: {:.3f}%".format(epoch+1, train_losses[-1], train_accs[-1]*100))

Epoch 1 Training: 100%|██████████| 1875/1875 [02:00<00:00, 15.62it/s]


Epoch 1, Train Loss: 0.3677232, Train Accuracy: 46.897%


Epoch 2 Training: 100%|██████████| 1875/1875 [01:59<00:00, 15.64it/s]


Epoch 2, Train Loss: 0.3675495, Train Accuracy: 46.917%


Epoch 3 Training: 100%|██████████| 1875/1875 [01:59<00:00, 15.63it/s]


Epoch 3, Train Loss: 0.3675495, Train Accuracy: 46.917%


Epoch 4 Training: 100%|██████████| 1875/1875 [01:59<00:00, 15.64it/s]


Epoch 4, Train Loss: 0.3675495, Train Accuracy: 46.917%


Epoch 5 Training: 100%|██████████| 1875/1875 [01:59<00:00, 15.63it/s]

Epoch 5, Train Loss: 0.3675495, Train Accuracy: 46.917%



