In [None]:
import pickle
from joblib import load
import numpy as np
import pandas as pd

# fetch training data (thank you keegan)
df = load('../DL_data/results_dataframe/results_dataframe.pkl')
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [2]:
print(f'DF {df.shape}')
print(df.columns)


DF (10980480, 5)
Index(['n', 'k', 'm', 'result', 'P'], dtype='object')


In [3]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
from tqdm import tqdm

class MyDataset(Dataset):
    def __init__(self, df):
        self.X_numeric = torch.tensor(df['m'].values, dtype=torch.float32).unsqueeze(1)
        self.n_rows = df['k'].astype(int).values
        self.n_cols = df['n'].astype(int).values - df['k'].astype(int).values
        series = df['P'].apply(lambda s: torch.tensor(s, dtype=torch.float32))
        
        padded_matrices = []
        for i, flat in enumerate(series):
            rows, cols = self.n_rows[i], self.n_cols[i]
            mat = flat.reshape(rows, cols)
            # Pad to 6×6
            pad_r = 6 - rows
            pad_c = 6 - cols
            padded = nn.functional.pad(mat, (0, pad_c, 0, pad_r))
            padded_matrices.append(padded.flatten())
        
        self.X = torch.cat([self.X_numeric, torch.stack(padded_matrices)], dim=1)
        self.y = torch.tensor(df['result'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == "cuda":
    data = df
else: # <3 laptop
    data = df.head(200000)
# Hyperparameters
INPUT_DIM = 1 + 36
HIDDEN_DIM = 64
OUTPUT_DIM = 1
LR = 1e-3
BATCH_SIZE = 128
EPOCHS = 25

# Data
num_train_samples = int(0.65 * len(data))
num_val_samples = int(0.25 * len(data))
num_test_samples = len(data) - num_train_samples - num_val_samples
print("num_train_samples:", num_train_samples)
print("num_val_samples:", num_val_samples)
print("num_test_samples:", num_test_samples)
dataset = MyDataset(data)
train_data, val_data, test_data = random_split(dataset, [num_train_samples, num_val_samples, num_test_samples])
# loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)


Using device: cuda
num_train_samples: 7137312
num_val_samples: 2745120
num_test_samples: 1098048


In [None]:
# Model 1
class DenseNet(nn.Module):
    def __init__(self):
        super().__init__()
        # consider ELU ELU
        self.net = nn.Sequential(
            nn.Linear(INPUT_DIM, 128),
            nn.LeakyReLU(0.01),
            nn.Linear(128, 256),
            nn.LeakyReLU(0.01),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.01),
            nn.Linear(128, 32),
            nn.LeakyReLU(0.01),
            nn.Linear(32, OUTPUT_DIM)
        )

    def forward(self, x):
        return self.net(x)


class Reshape1D(nn.Module):
    def forward(self, x):
        # Split numeric (first col) and matrix (next 36 cols)
        numeric = x[:, :1]                       # shape (batch, 1)
        matrix = x[:, 1:].reshape(x.size(0), 1, 36)# shape (batch, 1, 36)
        return matrix, numeric
# Model 2
class Conv1DenseNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=36),  # input length=36
            nn.ELU(0.01),
            nn.Conv1d(32, 64, kernel_size=1),
            nn.ELU(0.01),
            nn.Flatten(start_dim=1)            # → (batch, 64)
        )

        self.fc = nn.Sequential(
            nn.Linear(64 + 1, 128),
            nn.ELU(0.01),
            nn.Linear(128, 256),
            nn.ELU(0.01),
            nn.Linear(256, 128),
            nn.ELU(0.01),
            nn.Linear(128, 32),
            nn.ELU(0.01),
            nn.Linear(32, OUTPUT_DIM)
        )

    def forward(self, x):
        matrix, numeric = Reshape1D()(x)
        conv_out = self.conv(matrix)   # should be (batch, 64)
        # print("conv_out:", conv_out.shape, "numeric:", numeric.shape)
        combined = torch.cat([conv_out, numeric], dim=1)  # shape (128,65)
        return self.fc(combined)



def ratio_loss(preds, targets, eps=1e-6):
    preds = torch.clamp(preds, min=eps)
    targets = torch.clamp(targets, min=eps)
    return torch.mean((torch.log2(targets) - torch.log2(preds))**2)
criterion = ratio_loss

def train(model, train_loader, val_loader):
    train_losses, val_losses = [], []
    for epoch in range(EPOCHS):
        model.train()
        epoch_train_loss = 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            preds = model(X)
            loss = criterion(preds, y)
            loss.backward(); optimizer.step()
            epoch_train_loss += loss.item()

        model.eval()
        epoch_val_loss = 0
        with torch.no_grad():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                epoch_val_loss += criterion(model(X), y).item()

        train_losses.append(epoch_train_loss/len(train_loader))
        val_losses.append(epoch_val_loss/len(val_loader))
        print(f"Epoch {epoch+1}/{EPOCHS} — train: {train_losses[-1]:.4f}, val: {val_losses[-1]:.4f}")

    # Plot losses
    plt.figure()
    plt.plot(train_losses, label="Train loss")
    plt.plot(val_losses, label="Val loss")
    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend()
    plt.show()

    return model

In [17]:
def test_and_plot(model, test_loader):
    model.eval()

    all_preds, all_targets, all_features = [], [], []
    total_loss = 0
    criterion = ratio_loss

    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            preds = model(X).reshape(-1)
            total_loss += criterion(preds, y).item() * X.size(0)

            all_preds.append(preds.cpu())
            all_targets.append(y.cpu())
            all_features.append(X[:, 0].cpu())    # column 0 = “m”

    all_preds = torch.cat(all_preds).numpy()
    all_targets = torch.cat(all_targets).numpy()
    feature_vals = torch.cat(all_features).numpy()

    loss = total_loss / len(test_loader.dataset)
    print(f"Test Ratio Loss: {loss:.6f}")

    # 95% bounds
    combined = np.concatenate([all_targets, all_preds])
    low, high = np.percentile(combined, [0, 99])

    plt.figure()
    sc = plt.scatter(all_targets, all_preds, c=feature_vals, alpha=0.7)
    plt.plot([low, high], [low, high], linestyle='--')
    plt.xlim(low, high); plt.ylim(low, high)
    plt.xlabel("True target"); plt.ylabel("Predicted")
    plt.title("Test: True vs Predicted (colored by m)")
    plt.colorbar(sc, label="m")
    plt.show()


model = Conv1DenseNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
model = train(model, train_loader, val_loader)
test_and_plot(model, test_loader)

Epoch 1/25 — train: 10.9135, val: 10.8513
Epoch 2/25 — train: 10.8423, val: 10.8512
Epoch 3/25 — train: 10.8419, val: 10.8509


KeyboardInterrupt: 

In [None]:
# Model 3
class Conv1to2DNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.reshape = Reshape1D()
        self.conv1d = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=3, padding=1),
            nn.ELU(0.01),
        )
        self.conv2d = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(3,3), padding=1),
            nn.ELU(0.01),
            nn.Flatten(start_dim=1)
        )
        self.fc = nn.Sequential(
            nn.Linear(64*6*6 + 1, 128),
            nn.ELU(0.01),
            nn.Linear(128, 256),
            nn.ELU(0.01),
            nn.Linear(256, 128),
            nn.ELU(0.01),
            nn.Linear(128, 32),
            nn.ELU(0.01),
            nn.Linear(32, OUTPUT_DIM)
        )

    def forward(self, x):
        matrix, numeric = Reshape1D()(x)
        x1 = self.conv1d(matrix)         # → (batch,32,1)
        x2 = x1.reshape(x1.size(0), 32, 6, 6)  # reshape channels into 6×6 spatial map
        conv2 = self.conv2d(x2)
        return self.fc(torch.cat([conv2, numeric], dim=1))
    

model = Conv1to2DNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
model = train(model, train_loader, val_loader)
test_and_plot(model, test_loader)

Epoch 1/25 — train: 10.8685, val: 10.8603
Epoch 2/25 — train: 10.8413, val: 10.8618
Epoch 3/25 — train: 10.8405, val: 10.8525
Epoch 4/25 — train: 10.8403, val: 10.8510


KeyboardInterrupt: 

In [None]:
# Model 4
class Reshape2D(nn.Module):
    def forward(self, x):
        # x is assumed to have shape (batch, 37): 1 numeric feature + 36 matrix elements.
        numeric = x[:, :1]                       # (batch, 1)
        batch = x.size(0)
        # Reshape the next 36 elements into a 6x6 matrix.
        # We'll treat this as a single-channel image.
        matrix = x[:, 1:].reshape(batch, 1, 6, 6)  # (batch, 1, 6, 6)
        return matrix, numeric

class Conv2DNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv2d = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(6, 1)),      # (batch, 32, 1, 6)
            nn.ELU(0.01),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(1, 3), padding=(0, 1)),     # (batch, 64, 1, 6)
            nn.ELU(0.01),
            nn.Flatten(start_dim=1)  # Flattens to (batch, 64*6) = (batch, 384)
        )
        self.fc = nn.Sequential(
            nn.Linear(384 + 1, 128),
            nn.ELU(0.01),
            nn.Linear(128, 256),
            nn.ELU(0.01),
            nn.Linear(256, 128),
            nn.ELU(0.01),
            nn.Linear(128, 32),
            nn.ELU(0.01),
            nn.Linear(32, OUTPUT_DIM)
        )

    def forward(self, x):
        matrix, numeric = Reshape2D()(x)
        conv_out = self.conv2d(matrix)
        combined = torch.cat([conv_out, numeric], dim=1)
        return self.fc(combined)

    

model = Conv2DNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
model = train(model, train_loader, val_loader)
test_and_plot(model, test_loader)

Epoch 1/25 — train: 10.8972, val: 10.8530
Epoch 2/25 — train: 10.8445, val: 10.8512
Epoch 3/25 — train: 10.8420, val: 10.8607


KeyboardInterrupt: 