In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Subset
from torch.utils.data.sampler import SubsetRandomSampler, SequentialSampler
from torch.optim import Adam
from torch.nn import BCELoss
from torchmetrics import Accuracy

from layers import GRULayer, AttnLayer, BilinearLayer, LSTMLayer, TemporalAttn
from model import SMEncoder, PriceEncoder, ConcatProcessor

import pickle
import numpy as np
from modeldata import ModelData



In [2]:
with open('text_tensor.pkl', 'rb') as f:
    text = pickle.load(f)

In [3]:
with open('price_tensor.pkl', 'rb') as f:
    prices = pickle.load(f)

In [4]:
with open('targets_onehot.pkl', 'rb') as f:
    targets = pickle.load(f)

In [5]:
len(text)

534

In [6]:
len(targets)

534

In [7]:
dataset = ModelData(text, prices, targets)

In [8]:
sample = dataset[0]

In [9]:
s_text, s_price, s_target = sample

In [10]:
s_text.shape

torch.Size([5, 30, 512])

In [11]:
s_price.shape

torch.Size([5, 3])

In [12]:
s_target.shape

torch.Size([2])

In [13]:
train_split = 0.8
test_split = 0
val_split = 0.2

dataset_size = len(dataset)
indices = list(range(dataset_size))
split1 = int(dataset_size * train_split)
split2 = int(dataset_size * (train_split + test_split))
split3 = int(dataset_size * (train_split + test_split + val_split))

train_indices = indices[:split1]
test_indices = indices[split1:split2]
val_indices = indices[split2:split3]


In [14]:
split1, split2, split3

(427, 427, 534)

In [15]:
train = Subset(dataset, train_indices)
test = Subset(dataset, test_indices)
val = Subset(dataset, val_indices)

In [16]:
len(train), len(test), len(val)

(427, 0, 107)

In [17]:
s_text1, s_price1, s_target1 = train[0]

In [18]:
train_data = DataLoader(train, batch_size=4, shuffle=True)
# test_data = DataLoader(test, batch_size=5, shuffle=False)
val_data = DataLoader(val, batch_size=4, shuffle=False)

# Training

In [19]:
model = ConcatProcessor(512, 3, 64)

In [20]:
criterion = BCELoss()
optimizer = Adam(model.parameters(), lr=0.1)

In [21]:
# for text, price, target in train_data:
#     print(text.shape, price.shape, target.shape)
#     # print(text.dtype, price.dtype, target.dtype)

In [25]:
for epoch in range(10):
    for text, price, target in train_data:
        optimizer.zero_grad()
        output = model(text, price)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

In [26]:
acc = Accuracy(task='binary')

In [27]:
model.eval()
with torch.no_grad():
    for text, price, target in val_data:
        output = model(text, price)
        print(acc(output, target))
accuracy = acc.compute()
print(f"Accuracy: {accuracy}")        

tensor(0.7500)
tensor(0.5000)
tensor(0.5000)
tensor(0.5000)
tensor(0.2500)
tensor(0.2500)
tensor(0.7500)
tensor(0.5000)
tensor(0.2500)
tensor(0.2500)
tensor(0.)
tensor(0.2500)
tensor(0.5000)
tensor(0.2500)
tensor(0.7500)
tensor(0.2500)
tensor(0.5000)
tensor(0.5000)
tensor(0.7500)
tensor(0.2500)
tensor(0.5000)
tensor(0.7500)
tensor(0.7500)
tensor(0.5000)
tensor(1.)
tensor(0.5000)
tensor(0.3333)
Accuracy: 0.47663551568984985
