In [5]:
import pandas as pd
from pathlib import Path
import numpy as np

In [6]:
model_path = Path.cwd()

In [7]:
def load_data(model_path: Path, split = 'train'):
    data_file = model_path / 'data' / '100k.h5'  
    with pd.HDFStore(data_file) as storedata:
        data = storedata['data'].to_numpy()
        metadata = storedata.get_storer('data').attrs.metadata
    data = np.asarray(data)
    length = data.shape[0]
    train_size = int(length*0.7)
    eval_size = int(length*0.1)
    train_idx = np.arange(0, train_size)
    eval_idx = np.arange(train_size, train_size + eval_size)
    test_idx = np.arange(train_size + eval_size, length)
    if split == 'train':
        return data[train_idx], metadata
    elif split == 'val':
        return data[eval_idx], metadata
    elif split == 'test':
        return data[test_idx], metadata
    else:
        raise ValueError(f"split must be one of ['train', 'val', 'test'], got {split}")

In [9]:
data, meta = load_data(model_path)

In [12]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, layer_widths):
        super(MLP, self).__init__()
        self.layer_widths = layer_widths
        self.layers = nn.ModuleList([nn.Linear(layer_widths[i], layer_widths[i+1]) for i in range(len(layer_widths)-1)])
        self.activation = nn.CELU()

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i != len(self.layers) - 1:
                x = self.activation(x)
        return x
