In [10]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sentence_transformers import SentenceTransformer

In [None]:

train_df = pd.read_csv("train.csv")
val_df = pd.read_csv("val.csv")
test_df = pd.read_csv("test.csv")

In [30]:
model = SentenceTransformer("all-MiniLM-L6-v2")
X_train = model.encode(train_df["query"].tolist(), convert_to_tensor=True)
X_val = model.encode(val_df["query"].tolist(), convert_to_tensor=True)
X_test = model.encode(test_df["query"].tolist(), convert_to_tensor=True)

X_train = X_train.unsqueeze(1)
X_val   = X_val.unsqueeze(1)
X_test  = X_test.unsqueeze(1)

y_train = torch.tensor(train_df["carb"].values, dtype=torch.float32).unsqueeze(1)
y_val   = torch.tensor(val_df["carb"].values, dtype=torch.float32).unsqueeze(1)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
val_loader   = DataLoader(TensorDataset(X_val, y_val), batch_size=32)

In [24]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

In [None]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size=384, hidden_size=64, activation='ReLU', num_layers=2, dropout=0.2, bidrectional=False):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.rnn(x)
        return self.fc(out[:, -1, :])    


In [None]:
model = RNN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_deacy=1e-5)
criterion = nn.MSELoss()

for epoch in range(100):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad()
        preds = model(x_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Train Loss: {total_loss:.4f}")

Epoch 1, Train Loss: 35.5696
           Val RMSE: 39.0734
Epoch 2, Train Loss: 32.7550
           Val RMSE: 37.3609
Epoch 3, Train Loss: 31.4990
           Val RMSE: 36.6106
Epoch 4, Train Loss: 30.5363
           Val RMSE: 36.1287
Epoch 5, Train Loss: 30.2295
           Val RMSE: 35.6607
Epoch 6, Train Loss: 29.5854
           Val RMSE: 35.4028
Epoch 7, Train Loss: 29.2821
           Val RMSE: 35.1561
Epoch 8, Train Loss: 29.1696
           Val RMSE: 35.0504
Epoch 9, Train Loss: 28.6743
           Val RMSE: 34.7867
Epoch 10, Train Loss: 28.3511
           Val RMSE: 34.8167
Epoch 11, Train Loss: 28.6085
           Val RMSE: 34.3892
Epoch 12, Train Loss: 28.1759
           Val RMSE: 34.3301
Epoch 13, Train Loss: 28.4782
           Val RMSE: 34.2525
Epoch 14, Train Loss: 27.8323
           Val RMSE: 34.2801
Epoch 15, Train Loss: 28.0569
           Val RMSE: 34.2781
Epoch 16, Train Loss: 28.0413
           Val RMSE: 34.1549
Epoch 17, Train Loss: 27.9108
           Val RMSE: 33.9168
Epoch 

In [33]:
model.eval()
with torch.no_grad():
    preds = model(X_test).squeeze().numpy()

# Add prediction column and save
test_df["carb"] = preds
test_df.to_csv("RNNtesting.csv", index=False)