In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sentence_transformers import SentenceTransformer

In [2]:

train_df = pd.read_csv("train.csv")
val_df = pd.read_csv("val.csv")
test_df = pd.read_csv("test.csv")

In [7]:
model_st = SentenceTransformer("all-MiniLM-L6-v2")
X_train = model_st.encode(train_df["query"].tolist(), convert_to_tensor=True)
X_val = model_st.encode(val_df["query"].tolist(), convert_to_tensor=True)

y_train = torch.tensor(train_df["carb"].values, dtype=torch.float32).unsqueeze(1)
y_val = torch.tensor(val_df["carb"].values, dtype=torch.float32).unsqueeze(1)
X_train = X_train.unsqueeze(1)  # (N, 1, 384)
X_val = X_val.unsqueeze(1)

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

X_train.shape

torch.Size([8000, 1, 384])

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size=384, hidden_size=128, num_layers=2, dropout=0.4):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers,
                            dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)          # x: (batch, seq_len, input_size)
        out = out[:, -1, :]            # Get last time step
        return self.fc(out)


In [63]:
import math
import torch.nn.functional as F

model = LSTMModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.MSELoss()

for epoch in range(150):
    model.train()
    train_preds = []
    train_targets = []
    total_loss = 0

    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        train_preds.append(pred.detach())
        train_targets.append(yb)

    train_preds = torch.cat(train_preds)
    train_targets = torch.cat(train_targets)
    train_rmse = math.sqrt(F.mse_loss(train_preds, train_targets).item())

    print(f"Epoch {epoch+1}, Train Loss: {total_loss:.4f}, Train RMSE: {train_rmse:.2f}")

Epoch 1, Train Loss: 481870.2575, Train RMSE: 43.90
Epoch 2, Train Loss: 416338.1710, Train RMSE: 40.81
Epoch 3, Train Loss: 387644.6107, Train RMSE: 39.38
Epoch 4, Train Loss: 370881.0496, Train RMSE: 38.52
Epoch 5, Train Loss: 357836.3816, Train RMSE: 37.83
Epoch 6, Train Loss: 344904.0132, Train RMSE: 37.14
Epoch 7, Train Loss: 334811.2111, Train RMSE: 36.60
Epoch 8, Train Loss: 324500.1559, Train RMSE: 36.03
Epoch 9, Train Loss: 316150.6721, Train RMSE: 35.56
Epoch 10, Train Loss: 304870.2886, Train RMSE: 34.92
Epoch 11, Train Loss: 295413.9996, Train RMSE: 34.38
Epoch 12, Train Loss: 286553.0310, Train RMSE: 33.86
Epoch 13, Train Loss: 279394.9477, Train RMSE: 33.43
Epoch 14, Train Loss: 270100.1623, Train RMSE: 32.87
Epoch 15, Train Loss: 262113.8207, Train RMSE: 32.38
Epoch 16, Train Loss: 259974.2054, Train RMSE: 32.25
Epoch 17, Train Loss: 248913.1646, Train RMSE: 31.55
Epoch 18, Train Loss: 242563.6898, Train RMSE: 31.15
Epoch 19, Train Loss: 238391.4393, Train RMSE: 30.88
Ep

In [64]:
import torch.nn.functional as F
import math

model.eval()
with torch.no_grad():
    preds = []
    targets = []
    for xb, yb in val_loader:
        y_pred = model(xb)
        preds.append(y_pred)
        targets.append(yb)

    preds = torch.cat(preds)
    targets = torch.cat(targets)
    rmse = math.sqrt(F.mse_loss(preds, targets).item())
    print(f"Validation RMSE: {rmse:.2f}")

    #18.44 -> hidden_size=128, num_layers=3, dropout=0.5


Validation RMSE: 16.37


In [65]:
model.eval()
with torch.no_grad():
    preds = model(X_val).squeeze().numpy()

# Add prediction column and save
test_df["carb"] = preds
test_df.to_csv("test_with_predictions_transformer_lstm.csv", index=False)