In [1]:
import sys
import os
import copy
sys.path.append('../')
from model import NextWordPredictor, train, test

In [2]:
import pandas as pd
import torch
import torchtext.functional as F

In [3]:
train_d = pd.read_csv('../data/stack_overflow/train.csv')
test_d = pd.read_csv('../data/stack_overflow/test.csv')
val_d = pd.read_csv('../data/stack_overflow/val.csv')

In [4]:
train_sentence = train_d['text'][0]
test_sentence = test_d['text'][0]
val_sentence = val_d['text'][0]

In [5]:
words1 = train_sentence.split()
words2 = test_sentence.split()

In [6]:
vocab = set(words1 + words2)

In [7]:
word_to_index = { word: i for i, word in enumerate(vocab) }
index_to_word = { i: word for word, i in word_to_index.items() }

In [8]:
vocab_size = len(vocab)
vocab_size

7843

In [9]:
indices1 = [word_to_index[word] for word in words1]
indices2 = [word_to_index[word] for word in words2]

In [10]:
train_data = torch.tensor(indices1 + indices2, dtype=torch.long).view(1, -1)  # Combine both sentences for training
train_targets = torch.tensor(indices1 + indices2, dtype=torch.long).view(1, -1)  # Targets are next words

In [11]:
test_data = torch.tensor(indices2, dtype=torch.long).view(1, -1)  # Test sequence
test_targets = torch.tensor(indices2, dtype=torch.long).view(1, -1)  # Test targets

In [12]:
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data, train_targets), batch_size=1)
test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data, test_targets), batch_size=1)

In [13]:
# Model parameters
embedding_dim = 100
hidden_dim = 128
epochs = 10

In [14]:
import torch.nn as nn
import torch.optim as optim

In [18]:
# Initialize model, criterion, and optimizer
model = NextWordPredictor(10, embedding_dim, hidden_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [19]:
train(model, train_loader, criterion, optimizer, epochs)

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 1/10, Loss: 9.21760082244873
Epoch 2/10, Loss: 8.915006637573242
Epoch 3/10, Loss: 8.49959945678711
Epoch 4/10, Loss: 7.621667861938477
Epoch 5/10, Loss: 6.866685390472412
Epoch 6/10, Loss: 6.191314697265625
Epoch 7/10, Loss: 5.710676670074463
Epoch 8/10, Loss: 5.361766338348389
Epoch 9/10, Loss: 5.057620525360107
Epoch 10/10, Loss: 4.764069080352783


4.764069080352783

In [20]:
test(model, test_loader, criterion)

4.433952331542969

In [21]:
path = '../results/test'
if not os.path.exists(path):
    os.makedirs(path)

torch.save(model.state_dict(), f'{path}/model.pth')

In [24]:
another_model = NextWordPredictor(vocab_size, 100, 128)
another_model

NextWordPredictor(
  (embedding): Embedding(10000, 100)
  (rnn): LSTM(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=10000, bias=True)
)

In [23]:
model

NextWordPredictor(
  (embedding): Embedding(10000, 100)
  (rnn): LSTM(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=10000, bias=True)
)

In [25]:
model_path = f'{path}/model.pth'

if os.path.exists(model_path):
    new_model = copy.deepcopy(another_model)
    new_model.load_state_dict(torch.load(model_path))
    print(new_model)

NextWordPredictor(
  (embedding): Embedding(10000, 100)
  (rnn): LSTM(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=10000, bias=True)
)


In [26]:
test(new_model, test_loader, criterion)

4.433952331542969