In [308]:
import random
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from dataclasses import dataclass

In [309]:
seed = 42

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed);

In [310]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device {device}')

Using device cuda


### Prepare the data

In [311]:
class ImpurityDataset(Dataset):
    
    def __init__(self, dataframe, fixed_features, labels, feature_scaler=None, label_r_scaler=None, label_i_scaler=None, device=None):
        assert len(labels) % 2 == 0
        
        self.fixed_features = fixed_features
        self.labels = labels
        self.n_samples = len(dataframe)
        
        self.output_length = 2
        self.input_length = len(fixed_features) + len(labels) - self.output_length
        self.sequence_length = len(labels) // self.output_length # how many auto regressive steps we have to take; not SEQ_LEN of the transformer!

        df_features = dataframe[fixed_features]
        df_labels = dataframe[labels]
        
        if feature_scaler is not None and label_r_scaler is not None and label_i_scaler is not None:
            xs = feature_scaler.transform(df_features)
            
            ys_r = label_r_scaler.transform(df_labels[df_labels.columns[::2]])
            ys_i = label_i_scaler.transform(df_labels[df_labels.columns[1::2]])
            
            ys = np.empty((ys_r.shape[0], ys_r.shape[1] * 2), dtype=ys_r.dtype)
            ys[:, ::2] = ys_r  # fill even indices with real parts
            ys[:, 1::2] = ys_i  # fill odd indices with imaginary parts
        else:
            xs = df_features.values
            ys = df_labels.values
        
        xss = np.zeros((self.n_samples * self.sequence_length, self.input_length))
        yss = np.zeros((self.n_samples * self.sequence_length, self.input_length, self.output_length))

        ff_len = len(self.fixed_features)
        
        for i in range(self.n_samples * self.sequence_length):
            j = i // self.sequence_length
            end = (i % self.sequence_length) * self.output_length
            
            xss[i, :ff_len] = xs[j, :]
            xss[i, ff_len:ff_len+end] = ys[j, :end]

            yss[i, ff_len+end-1:ff_len+end, :] = ys[j, end:end + self.output_length].reshape(self.output_length)
       
        xss = xss.reshape(self.n_samples * self.sequence_length, self.input_length, 1)

        self.feature_data = torch.tensor(xss, dtype=torch.float).to(device)
        self.label_data = torch.tensor(yss, dtype=torch.float).to(device)

    def __len__(self):
        return self.n_samples * self.sequence_length

    def __getitem__(self, idx):
        return self.feature_data[idx], self.label_data[idx], (len(self.fixed_features) - 1 + (idx % self.sequence_length) * self.output_length) 

In [312]:
def compute_scalers(dataframe, fixed_features, labels, test_size=0.1, random_state=None):
    train_df, _ = train_test_split(dataframe, test_size=test_size, random_state=random_state)
    df_features = train_df[fixed_features]
    df_labels = train_df[labels]
    
    feature_scaler = StandardScaler()
    label_r_scaler = StandardScaler()
    label_i_scaler = StandardScaler()

    feature_scaler.fit(df_features)
    label_r_scaler.fit(df_labels[df_labels.columns[::2]]) # get only real columns
    label_i_scaler.fit(df_labels[df_labels.columns[1::2]]) # get only imaginary columns

    return feature_scaler, label_r_scaler, label_i_scaler

In [313]:
file_path = '../data/20230825_144318_10k_EVDoubExp-TExp-wmax5-sparse-hyb_with_perturbation.csv'

#fixed_features = ['beta', 'U', 'Eimp', 'E1', 'E2', 'E3', 'V1', 'V2', 'V3']
#fixed_features = ['beta', 'E1', 'E2', 'E3', 'V1', 'V2', 'V3']
fixed_features = ['beta', 'E1', 'E2', 'E3', 'V1', 'V2', 'V3', 'ReFso1', 'ImFso1', 'ReFso3', 'ImFso3', 'ReFso5', 'ImFso5', 'ReFso7', 'ImFso7', 'ReFso9', 'ImFso9', 'ReFso11', 'ImFso11', 'ReFso13', 'ImFso13', 'ReFso15', 'ImFso15', 'ReFso17', 'ImFso17', 'ReFso19', 'ImFso19', 'ReFso21', 'ImFso21', 'ReFso23', 'ImFso23', 'ReFso25', 'ImFso25', 'ReFso29', 'ImFso29', 'ReFso33', 'ImFso33', 'ReFso37', 'ImFso37', 'ReFso43', 'ImFso43', 'ReFso49', 'ImFso49', 'ReFso57', 'ImFso57', 'ReFso69', 'ImFso69', 'ReFso83', 'ImFso83', 'ReFso101', 'ImFso101', 'ReFso127', 'ImFso127', 'ReFso165', 'ImFso165', 'ReFso237', 'ImFso237', 'ReFso399', 'ImFso399', 'ReFso1207', 'ImFso1207']
labels = ['ReSf1', 'ImSf1', 'ReSf3', 'ImSf3', 'ReSf5', 'ImSf5', 'ReSf7', 'ImSf7', 'ReSf9', 'ImSf9', 'ReSf11', 'ImSf11', 'ReSf13', 'ImSf13', 'ReSf15', 'ImSf15', 'ReSf17', 'ImSf17', 'ReSf19', 'ImSf19', 'ReSf21', 'ImSf21', 'ReSf23', 'ImSf23', 'ReSf25', 'ImSf25', 'ReSf29', 'ImSf29', 'ReSf33', 'ImSf33', 'ReSf37', 'ImSf37', 'ReSf43', 'ImSf43', 'ReSf49', 'ImSf49', 'ReSf57', 'ImSf57', 'ReSf69', 'ImSf69', 'ReSf83', 'ImSf83', 'ReSf101', 'ImSf101', 'ReSf127', 'ImSf127', 'ReSf165', 'ImSf165', 'ReSf237', 'ImSf237', 'ReSf399', 'ImSf399', 'ReSf1207', 'ImSf1207']

df = pd.read_csv(file_path, skiprows=4) # we skip the first four lines, because they are just metadata
df = df[fixed_features + labels]

validation_size = 0.1 # 90% training, 10% for validation

feature_scaler, label_r_scaler, label_i_scaler = compute_scalers(df, fixed_features, labels, validation_size, seed) # make sure we use the same seed, otherwise the two splits differ!
#dataset = ImpurityDataset(df, fixed_features, labels, device=device)
dataset = ImpurityDataset(df, fixed_features, labels, feature_scaler, label_r_scaler, label_i_scaler, device)  

indices = list(range(len(dataset)))
train_indices, val_indices = train_test_split(indices, test_size=validation_size, random_state=seed)  # make sure we use the same seed, otherwise the two splits differ!

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0)

In [314]:
dataset.__getitem__(0);

### Define the model

In [315]:
class PositionalEncodingL(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=27):
        super(PositionalEncodingL, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.positional_embedding = nn.Parameter(torch.zeros(max_len, d_model))
    
    def forward(self, x):
        batch_size, seq_len, d_model = x.shape
        position_encoded = self.positional_embedding[:, :].unsqueeze(0).expand(batch_size, -1, -1)
        x = x + position_encoded
        return self.dropout(x)

In [316]:
class PositionalEncodingF(torch.nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=27):
        super(PositionalEncodingF, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

In [317]:
@dataclass
class ModelConfig:
    fixed_features: int
    labels: int

    input_dim: int
    output_dim: int
    
    d_model: int
    nhead: int
    num_layers: int
    dim_feedforward: int
    
    dropout: float
    activation: str
    bias: bool

class AutoregressiveTransformer(nn.Module):
    
    def __init__(self, config, device):
        super(AutoregressiveTransformer, self).__init__()

        self.config = config
        self.sequence_length = config.fixed_features + config.labels - config.output_dim
        
        self.input_projection = nn.Linear(config.input_dim, config.d_model)
        
        self.positional_encoding = PositionalEncodingL(config.d_model, dropout=config.dropout, max_len=self.sequence_length)
        
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=config.d_model, 
            nhead=config.nhead, 
            dim_feedforward=config.dim_feedforward, 
            dropout=config.dropout,
            activation=config.activation, 
            batch_first=True, 
            norm_first=True, 
            bias=config.bias
        )
        
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.num_layers)
        
        self.output_layer = nn.Linear(config.d_model, config.output_dim)
        
        self.att_mask = self.generate_mask(self.sequence_length, self.config.fixed_features, self.config.output_dim, device)
        
    def forward(self, x):        
        x = self.input_projection(x)        
        x = self.positional_encoding(x)
        
        output = self.transformer_decoder(x, x, tgt_mask=self.att_mask)
        output = self.output_layer(output)
        
        return output

    def generate_mask(self, sequence_length, fixed_features, output_dim, device):
        assert (sequence_length - fixed_features) % output_dim == 0
        
        mask = torch.full((sequence_length, sequence_length), float('-inf'), device=device)
        mask[:, :fixed_features] = 0 
        
        for i in range(fixed_features, sequence_length, output_dim):
            mask[i:, i] = torch.tensor([0] * (sequence_length - i), device=device)
            mask[i:, i+1] = torch.tensor([0] * (sequence_length - i), device=device)
            
        return mask

### Initialize the model

In [322]:
config = ModelConfig(
    fixed_features = len(fixed_features),
    labels = len(labels),
    
    input_dim = 1,
    output_dim = 2,
    
    d_model = 128,
    nhead = 4,
    num_layers = 2,
    dim_feedforward = 128 * 4,
    
    dropout = 0.1,
    activation = 'gelu',
    bias = True
)

model = AutoregressiveTransformer(config, device).to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [323]:
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    
    for inputs, targets, idx in train_loader:
        
        optimizer.zero_grad()

        outputs = model(inputs)

        masked_outputs = torch.zeros(outputs.shape, device=device)

        for row, col in enumerate(idx):
            masked_outputs[row, col] = outputs[row, col]
        
        loss = criterion(masked_outputs, targets)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

    return total_loss / len(train_loader)

In [324]:
def validate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    
    with torch.no_grad():
        
        for inputs, targets, idx in val_loader:

            outputs = model(inputs)
            masked_outputs = torch.zeros(outputs.shape, device=device)
    
            for row, col in enumerate(idx):
                masked_outputs[row, col] = outputs[row, col]
            
            loss = criterion(masked_outputs, targets)
            
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

### Train the model

In [None]:
num_epochs = 100

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_loss = validate(model, val_loader, criterion, device)
    val_mape = validate_mape(model, len(fixed_features), val_loader, label_r_scaler, label_i_scaler, device)
    print(f"Epoch {(epoch+1):3d}: Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}, Val MAPE: {val_mape:.6f}")

Epoch   1: Train Loss: 0.000493, Val Loss: 0.000253, Val MAPE: 27.547171
Epoch   2: Train Loss: 0.000268, Val Loss: 0.000086, Val MAPE: 10.645461
Epoch   3: Train Loss: 0.000238, Val Loss: 0.000094, Val MAPE: 12.192848
Epoch   4: Train Loss: 0.000263, Val Loss: 0.000124, Val MAPE: 19.510052
Epoch   5: Train Loss: 0.000182, Val Loss: 0.000094, Val MAPE: 12.316251
Epoch   6: Train Loss: 0.000164, Val Loss: 0.000056, Val MAPE: 7.868703


### Manual model validation

In [None]:
def validate_mape(model, ff_len, val_loader, scaler_r, scaler_i, device, epsilon=1e-8):
    model.eval()
    total_loss = 0.0
    
    with torch.no_grad():
        
        for inputs, targets, idx in val_loader:

            outputs = model(inputs)

            outputs = [(ii, outputs[i, ii]) for i, ii in enumerate(idx)]
            targets = [(ii, targets[i, ii]) for i, ii in enumerate(idx)]
            
            outputs = torch.tensor(np.array([reverse_tranform_output(o, idx, ff_len, scaler_r, scaler_i) for idx, o in outputs]))
            targets = torch.tensor(np.array([reverse_tranform_output(t, idx, ff_len, scaler_r, scaler_i) for idx, t in targets]))
            
            ape = torch.abs((targets - outputs) / (targets + epsilon))
            mape = torch.mean(ape) * 100
                    
            total_loss += mape.item()
            
    return total_loss / len(val_loader)

In [None]:
def reverse_tranform_output(data, index, ff_len, scaler_r, scaler_i):
    data = data.cpu().numpy()
    
    data = np.tile(data, (27, 1))
    i_seq = (index - (ff_len - 1)) // 2
    
    r = scaler_r.inverse_transform(data[:, ::2].reshape(1, -1)).reshape(-1, 1)[i_seq]
    i = scaler_i.inverse_transform(data[:, 1::2].reshape(1, -1)).reshape(-1, 1)[i_seq]
    
    return np.concatenate((r, i), axis=0)

In [None]:
validate_mape(model, len(fixed_features), val_loader, label_r_scaler, label_i_scaler, device)

### Sample from the model

In [294]:
def sample_from_model(model, input, idx, sequence_length, ff_len, dim, device):
    model.eval()

    niters = (sequence_length - (ff_len - 1)) // 2
    fixed_features = input[:ff_len]
    
    initial_input = torch.zeros(1, sequence_length, dim)
    initial_input[0, :ff_len] = fixed_features
    current_input = initial_input.to(device)

    outputs = torch.zeros(niters + 1, 2, device = device)
    
    with torch.no_grad():
    
        for i in range(niters + 1):
            
            output = model(current_input)
            predictions = output[0, -1, -2:]

            print(output)
            
            outputs[i, :] = predictions

            break
            
            if i == niters:
                break

            pos = i * 2 + ff_len
            
            current_input[0, pos] = predictions[0].reshape(1, 1)
            current_input[0, pos + 1] = predictions[1].reshape(1, 1)

    return outputs

In [295]:
def convert(data):
    data = data.cpu().numpy()

    r = label_r_scaler.inverse_transform(data[:, 0].reshape(1, -1)).reshape(-1, 1)
    i = label_i_scaler.inverse_transform(data[:, 1].reshape(1, -1)).reshape(-1, 1)
            
    return np.concatenate((r, i), axis=1)

In [296]:
def calculate_mape(true_values, predictions, epsilon=1e-8):
    true_values = np.array(true_values)
    predictions = np.array(predictions)
    
    mape = np.mean(np.abs((true_values - predictions) / (true_values + epsilon))) * 100
    return mape

In [297]:
val_loader.dataset[i];

In [300]:
n_samples = 100 #train_loader.dataset.dataset.n_samples 
sequence_length = 59
ff_len = len(fixed_features)
dim = 1

mapes = []

for i in range(1):
    input, target, idx = dataset.__getitem__((i + 1) * 26)

    print(input)
    
    targets = torch.zeros(27, 2)
    targets[:-1] = input[ff_len:].reshape(26, 2)
    targets[-1:] = target[-1]
    

    outputs = sample_from_model(model, input, idx, sequence_length, ff_len, dim, device)

    print(targets)
    print(output)
    
    targets = convert(targets)
    outputs = convert(outputs)
    
    print(targets)
    print(output)
    
    mapes.append(calculate_mape(targets, outputs))

    if (i+1) % 100 == 0:
        print(i+1)

np.mean(mapes)

tensor([[ 1.6405e+00],
        [-2.9219e-01],
        [-1.9585e-01],
        [-2.6310e-01],
        [-1.3628e+00],
        [ 8.0899e-02],
        [ 8.5878e-01],
        [ 1.5760e-01],
        [ 5.9940e-01],
        [ 6.6298e-02],
        [ 7.8556e-01],
        [ 2.5548e-02],
        [ 8.2903e-01],
        [ 3.3141e-03],
        [ 8.0128e-01],
        [-1.0240e-02],
        [ 7.2251e-01],
        [-1.8943e-02],
        [ 6.0766e-01],
        [-2.4607e-02],
        [ 4.6878e-01],
        [-2.8217e-02],
        [ 3.1550e-01],
        [-3.0367e-02],
        [ 1.5529e-01],
        [-3.1446e-02],
        [-6.1983e-03],
        [-3.1726e-02],
        [-1.6482e-01],
        [-3.1406e-02],
        [-3.1765e-01],
        [-3.0636e-02],
        [-4.6276e-01],
        [-2.8191e-02],
        [-7.2559e-01],
        [-2.5055e-02],
        [-9.4980e-01],
        [-2.1640e-02],
        [-1.1370e+00],
        [-1.6517e-02],
        [-1.3573e+00],
        [-1.1758e-02],
        [-1.5189e+00],
        [-6

134.5969319343567

In [301]:
n_samples = 1000
sequence_length = 27
fixed_features_len = len(fixed_features)
max_features = fixed_features_len + len(labels) - 2

total_mapes = []

for j in range(sequence_length):
    fixed_labels_len = j
    
    mapes = []
    
    for idx in range(n_samples):
        input = val_loader.dataset[idx][0][fixed_labels_len]
        output = sample_from_model(model, input, sequence_length, fixed_features_len, fixed_labels_len, max_features, device)
        
        target = convert(val_loader.dataset[idx][1].reshape(-1))
        output = convert(output.reshape(-1))
        
        mapes.append(calculate_mape(target, output))
    
        if (idx+1) % 1000 == 0:
            print(idx+1)

    total_mapes.append(np.mean(mapes))

print(total_mapes)

RuntimeError: The expanded size of the tensor (59) must match the existing size (0) at non-singleton dimension 1.  Target sizes: [0, 59].  Tensor sizes: [0]

### Others

In [92]:
feat, label, i = dataset.__getitem__(1)
real = label[i].reshape(-1, 1).repeat(27, 1)[::2].reshape(1, -1).cpu()
img = label[i].reshape(-1, 1).repeat(27, 1)[1::2].reshape(1, -1).cpu()
real, img

(tensor([[0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576,
          0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576,
          0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576, 0.1576]]),
 tensor([[0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994,
          0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994,
          0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994, 0.5994]]))

In [93]:
(i - (len(fixed_features) - 1)) // 2

0

In [94]:
real_t = label_r_scaler.inverse_transform(real).reshape(-1, 1)
img_t = label_i_scaler.inverse_transform(img).reshape(-1, 1)
real_t[(i - (len(fixed_features) - 1)) // 2], img_t[(i - (len(fixed_features) - 1)) // 2]

(array([0.37865343]), array([-0.00128612]))

In [97]:
df[['ReSf1', 'ImSf1']]

Unnamed: 0,ReSf1,ImSf1
0,0.378653,-0.001286
1,0.445733,-0.059607
2,0.622064,-0.039583
3,0.635753,-0.013387
4,0.334830,-0.000671
...,...,...
9995,0.375442,-0.002151
9996,0.419402,-0.046064
9997,0.456471,-0.002609
9998,0.559925,-0.058278


In [23]:
_, train_df = train_test_split(df, test_size=0.1, random_state=seed)

In [24]:
df_labels = train_df[labels]
df_labels[df_labels.columns[::2]]

Unnamed: 0,ReSf1,ReSf3,ReSf5,ReSf7,ReSf9,ReSf11,ReSf13,ReSf15,ReSf17,ReSf19,...,ReSf49,ReSf57,ReSf69,ReSf83,ReSf101,ReSf127,ReSf165,ReSf237,ReSf399,ReSf1207
6252,0.283896,0.283963,0.284096,0.284291,0.284544,0.284850,0.285203,0.285597,0.286025,0.286482,...,0.293903,0.295546,0.297626,0.299535,0.301349,0.303100,0.304595,0.305908,0.306809,0.307278
4684,0.395039,0.396192,0.397776,0.399200,0.400289,0.401078,0.401643,0.402053,0.402356,0.402583,...,0.403473,0.403519,0.403561,0.403589,0.403609,0.403625,0.403636,0.403644,0.403649,0.403652
1731,0.231242,0.323789,0.336674,0.340689,0.342421,0.343318,0.343841,0.344172,0.344394,0.344550,...,0.345086,0.345111,0.345133,0.345148,0.345159,0.345167,0.345173,0.345177,0.345180,0.345181
4742,0.227485,0.265939,0.281392,0.288057,0.291425,0.293341,0.294527,0.295309,0.295849,0.296237,...,0.297627,0.297694,0.297754,0.297794,0.297824,0.297846,0.297862,0.297873,0.297881,0.297884
4521,0.569832,0.569755,0.569608,0.569401,0.569149,0.568867,0.568570,0.568270,0.567977,0.567699,...,0.565689,0.565535,0.565399,0.565312,0.565252,0.565209,0.565181,0.565161,0.565150,0.565144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3921,0.385395,0.393142,0.401404,0.405372,0.407445,0.408634,0.409369,0.409852,0.410184,0.410423,...,0.411269,0.411310,0.411346,0.411371,0.411388,0.411402,0.411411,0.411418,0.411423,0.411425
6685,0.299184,0.311765,0.318668,0.322133,0.323979,0.325045,0.325705,0.326140,0.326439,0.326654,...,0.327415,0.327452,0.327484,0.327506,0.327522,0.327534,0.327543,0.327549,0.327553,0.327555
3194,0.174371,0.210826,0.224599,0.230770,0.233986,0.235840,0.236993,0.237753,0.238279,0.238657,...,0.240005,0.240069,0.240128,0.240167,0.240195,0.240216,0.240232,0.240243,0.240250,0.240253
1941,0.259463,0.271048,0.275675,0.277917,0.279126,0.279831,0.280271,0.280561,0.280762,0.280905,...,0.281415,0.281440,0.281462,0.281476,0.281487,0.281495,0.281501,0.281505,0.281508,0.281509
