# Task B: Meta-Learning Perfomance Prediction

In this task, you will use information on training parameters and metadata on multiple OpenML dataset to train a performance predictor that performs well even for unseen datasets. You are provided with config parameters and metafeatures for six datasets. The datasets are split into training datasets and test datasets and you should only train on the training datasets.

For questions, you can contact zimmerl@informatik.uni-freiburg.de

__Note: Please use the dataloading and splits you are provided with in this notebook.__

## Specifications:

* Data: six_datasets_lw.json
* Number of datasets: 6
* Training datasets: higgs, vehicle, adult, volkert
* Test datasets: Fashion-MNIST, jasmine
* Number of configurations: 2000
* Available data: architecture parameters and hyperparameters, metafeatures 
* Target: final validation accuracy
* Evaluation metric: MSE

## Importing

Note: There are 51 steps logged, 50 epochs plus the 0th epoch, prior to any weight updates.

In [None]:
%%capture
%cd ..
#external
import numpy as np
import json
import matplotlib.pyplot as plt

#pytorch
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

#local
from func.api import Benchmark
from func.preprocess import delete_constant_features_X,reshape_op, scale_features, create_metafeatures_2d, concatenate_metafeatures_features
from func.networks.FNN_WO_HPO import FNN_WO_HPO
from func.networks.FNN_WO_HPO_BN import FNN_WO_HPO_BN
from func.networks.FNN_meta_WO_HPO import FNN_meta_WO_HPO
from func.train_eval import train_model, eval_model
from func.parse_metafeatures import parse_metafeatures_dict, remove_nan_metafeatures, create_metafeatures_array
from func.load_data import read_data, TrainValSplitter

## Cuda config

In [None]:
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Load data

In [None]:
bench_dir = "cached/six_datasets_lw.json"
bench = Benchmark(bench_dir, cache=False)

In [None]:
# Dataset split
dataset_names_all = bench.get_dataset_names()
print(dataset_names_all)

train_datasets = ['adult', 'higgs', 'vehicle', 'volkert']
test_datasets = ['Fashion-MNIST', 'jasmine']

## Meta feature handling

In [None]:
with open("cached/metafeatures_6.json", "r") as f:
    metafeatures = json.load(f)
metafeatures_without_nan = remove_nan_metafeatures(metafeatures)         

## Prepare data

In [None]:
#read the data and create ndarrays
X, y, dataset_names_TV = read_data(bench, train_datasets)
X_test, y_test, dataset_names_test = read_data(bench, test_datasets)
X_metafeatures_TV = create_metafeatures_array(metafeatures_without_nan, train_datasets)
X_metafeatures_test = create_metafeatures_array(metafeatures_without_nan, test_datasets)

In [None]:
tv_splitter = TrainValSplitter(dataset_names=dataset_names_TV)
X_train, X_val = tv_splitter.split(X)
y_train, y_val = tv_splitter.split(y)
dataset_names_train, dataset_names_val = tv_splitter.split(dataset_names_TV)
X_metafeatures_train, X_metafeatures_val = tv_splitter.split(X_metafeatures_TV)

print("X_train:", X_train.shape)
print("X_val:", X_val.shape)
print("X_test:", X_test.shape)
print()
print("Y_Train:",y_train.shape)
print("Y_val:",y_val.shape)
print("Y_Test:",y_test.shape)
print()
print("X_metafeatures_train:",X_metafeatures_train.shape)
print("X_metafeatures_val:" ,X_metafeatures_val.shape)
print("X_metafeatures_test:" , X_metafeatures_test.shape)

## Preprocess data

In [None]:
#delete constant features and reshape the arrays
X_train_transformed = delete_constant_features_X(X_train)
X_val_transformed = delete_constant_features_X(X_val)
X_test_transformed = delete_constant_features_X(X_test)

print("X_train_transformed:", X_train_transformed.shape)
print("X_val_transformed:", X_val_transformed.shape)
print("X_test_transformed:", X_test_transformed.shape)
print()

y_train_transformed = reshape_op(y_train)
y_val_transformed = reshape_op(y_val)
y_test_transformed = reshape_op(y_test)

print("y_train_transformed: ",y_train_transformed.shape )
print("y_val_transformed: ",y_val_transformed.shape)
print("y_test_transformed: ", y_test_transformed.shape)
print()

X_metafeatures_train_transformed = create_metafeatures_2d(X_metafeatures_train)
X_metafeatures_val_transformed = create_metafeatures_2d(X_metafeatures_val)
X_metafeatures_test_transformed = create_metafeatures_2d(X_metafeatures_test)

print("X_metafeatures_train_transformed:",X_metafeatures_train_transformed.shape)
print("X_metafeatures_val_transformed:" ,X_metafeatures_val_transformed.shape)
print("X_metafeatures_test_transformed:" , X_metafeatures_test_transformed.shape)
print()

In [None]:
#feature scaling
X_train_scaled = scale_features(X_train_transformed, method="minmax")
X_val_scaled = scale_features(X_val_transformed, method="minmax")
X_test_scaled = scale_features(X_test_transformed, method="minmax")

print("X_train_scaled:", X_train_scaled.shape)
print("X_val_scaled:", X_val_scaled.shape)
print("X_test_scaled:", X_test_scaled.shape)
print()

y_train_scaled = scale_features(y_train_transformed, method="minmax")
y_val_scaled = scale_features(y_val_transformed, method="minmax")
y_test_scaled = scale_features(y_test_transformed, method="minmax")

print("y_train_scaled:", y_train_scaled.shape)
print("y_val_scaled:", y_val_scaled.shape)
print("y_test_scaled:", y_test_scaled.shape)
print()

X_metafeatures_train_scaled = scale_features(X_metafeatures_train_transformed, method="minmax")
X_metafeatures_val_scaled = scale_features(X_metafeatures_val_transformed, method="minmax")
X_metafeatures_test_scaled = scale_features(X_metafeatures_test_transformed, method="minmax")

print("X_metafeatures_train_scaled:",X_metafeatures_train_scaled.shape)
print("X_metafeatures_val_scaled:" ,X_metafeatures_val_scaled.shape)
print("X_metafeatures_test_scaled:" , X_metafeatures_test_scaled.shape)
print()

## Hyperparameters + Metafeatures  -> New feature matrix

In [None]:
X_features_train = concatenate_metafeatures_features(X_train_scaled, X_metafeatures_train_scaled)
X_features_val = concatenate_metafeatures_features(X_val_scaled, X_metafeatures_val_scaled)
X_features_test = concatenate_metafeatures_features(X_test_scaled, X_metafeatures_test_scaled)

print("X_features_train:",X_features_train.shape)
print("X_features_val:" ,X_features_val.shape)
print("X_features_test:" , X_features_test.shape)
print()

## Convert all numpy arrays to pytorch tensors

In [None]:
#convert ndarray to pytorch tensor
X_train_tensor = torch.from_numpy(X_features_train.astype(np.float32))
X_val_tensor = torch.from_numpy(X_features_val.astype(np.float32))
X_test_tensor = torch.from_numpy(X_features_test.astype(np.float32))

print("X_train_tensor: ",X_train_tensor.shape)
print("X_val_tensor: ",X_val_tensor.shape)
print("X_test_tensor: ",X_test_tensor.shape)
print()

y_train_tensor = torch.from_numpy(y_train_scaled.astype(np.float32))
y_val_tensor = torch.from_numpy(y_val_scaled.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test_scaled.astype(np.float32))

print("y_train_tensor: ",y_train_tensor.shape )
print("y_val_tensor: ",y_val_tensor.shape)
print("y_test_tensor: ", y_test_tensor.shape)
print()

## Settings for baseline FNN

In [None]:
batch_size = 4
n_samples, n_features  = X_train_tensor.shape
hidden_units = 7
_ , output_size = y_train_tensor.shape
epochs = 70

print("# Training samples:", n_samples)
print("# Features per sample or inputs for 1st layer:", n_features)
print("output_size: ", output_size)

## DataLoading

In [None]:
train_dataset = TensorDataset(X_train_tensor,y_train_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,num_workers=2)

validation_dataset = TensorDataset(X_val_tensor, y_val_tensor)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_dataloader = DataLoader(test_dataset,batch_size=1, shuffle=True,num_workers=2)

## Check the data in the tensors

In [None]:
for x,  y in train_dataloader:
    print("X- minibatched: ", x)
    print("y- minibatched: ", y)
    break

## Training and scoring

In [None]:
model = FNN_meta_WO_HPO(n_features, output_size)
print("Model:")
print(model)

In [None]:
optimizer  = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=100)
criterion = nn.MSELoss()
train_model(train_dataloader, validation_dataloader, epochs, model, optimizer, scheduler, criterion)
#eval_model(test_dataloader, model, criterion)