In [None]:
base_data_path = "data/mymethod/"

In [None]:
!ls data/mymethod

In [None]:
import torch
from torch.utils.data import Dataset

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
# from torch.utils.tensorboard import SummaryWriter
# from tqdm import tqdm
from tqdm.notebook import tqdm

from matplotlib import pyplot as plt

In [None]:
class EmbeddingDataset(Dataset):
    def __init__(self, embeddings, labels):
        # embeddings: list of numpy arrays or torch tensors
        # labels: list of scalars
        self.X = torch.tensor(embeddings, dtype=torch.float32)
        self.y = torch.tensor(labels, dtype=torch.float16)  # or long, depending on your task

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
training_dataset = torch.load("data/mymethod/training.pt")
testing_dataset = torch.load("data/mymethod/testing.pt")
val_dataset = torch.load("data/mymethod/val.pt")

In [None]:
len(val_dataset), len(testing_dataset), len(training_dataset)

In [None]:
# Convert dataset outcomes to float32
def convert_dataset_to_float32(dataset):
    dataset.y = dataset.y.to(torch.float32)
    return dataset

# Convert all datasets
training_dataset = convert_dataset_to_float32(training_dataset)
testing_dataset = convert_dataset_to_float32(testing_dataset)
val_dataset = convert_dataset_to_float32(val_dataset)


In [None]:
# Define a simple two-layer neural net with dropout and layer norm
class SimpleNeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNeuralNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        self.layer2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x


In [None]:
input_dim = int(testing_dataset.X[0].shape[0])
hidden_dim = int(testing_dataset.X[0].shape[0]/2)

In [None]:
model = SimpleNeuralNet(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=1)
model = model.to("cuda")

In [None]:
optimizer = optim.AdamW(model.parameters(), lr=1e-3)

num_epochs_10 = 10
scheduler_10 = optim.lr_scheduler.LambdaLR(
    optimizer,
    lr_lambda=lambda epoch: 1e-6 / 1e-3 + (1 - epoch / num_epochs_10) * (1e-3 - 1e-6) / 1e-3
)


num_epochs_5 = 5
scheduler_5 = optim.lr_scheduler.LambdaLR(
    optimizer,
    lr_lambda=lambda epoch: 1e-6 / 1e-3 + (1 - epoch / num_epochs_5) * (1e-3 - 1e-6) / 1e-3
)

In [None]:
criterion = nn.MSELoss()

In [None]:
num_epochs = 10

In [None]:
# Prepare data loaders
batch_size = 512
train_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(testing_dataset, batch_size=batch_size, shuffle=False)


In [None]:
train_losses = []
test_losses = []

for epoch_num in range(num_epochs):
    
    test_loss = 0.0
    model.eval()
    with torch.no_grad():
        # for inputs, labels in tqdm(test_loader, desc=f"Testing Epoch {epoch_num}"):
        for inputs, labels in test_loader:
            inputs, labels = inputs.to("cuda"), labels.to("cuda")
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(-1), labels)
            # test_loss += loss.item()
            test_loss += loss.item() * inputs.size(0)  # Multiply batch loss by batch size
        avg_loss = test_loss/len(testing_dataset)
        test_losses.append(avg_loss)
        print(f"TEST LOSS: PRE Epoch-{epoch_num}, we have {avg_loss:.4f}")
        

    model.train()
    train_loss = 0.0

    for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch_num}"):
        inputs, labels = inputs.to("cuda"), labels.to("cuda")

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        # loss = criterion(outputs, labels) # MY MSITAKE - DO NOT DO THIS
        loss = criterion(outputs.squeeze(-1), labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # train_loss += loss.item()
        train_loss += loss.item() * inputs.size(0)  # Multiply batch loss by batch size
    avg_loss = train_loss/len(training_dataset) # /len(train_dataset) ?? TODO
    train_losses.append(avg_loss)  
    print(f"TRAIN LOSS: On Epoch-{epoch_num}, we have {avg_loss:.4f}")
    scheduler_10.step()
    # scheduler_5.step()
        
        

In [None]:
plt.plot(train_losses)

In [None]:
plt.plot(test_losses)

In [None]:
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
val_loss = 0.0
model.eval()
with torch.no_grad():
    # for inputs, labels in tqdm(test_loader, desc=f"Testing Epoch {epoch_num}"):
    for inputs, labels in test_loader:
        inputs, labels = inputs.to("cuda"), labels.to("cuda")
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(-1), labels)
        # test_loss += loss.item()
        val_loss += loss.item() * inputs.size(0)  # Multiply batch loss by batch size
    avg_loss = val_loss/len(testing_dataset)
    print(f"ON VAL SET, ACHIEVED {avg_loss:.4f}")