In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from utils import rnmse
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from skorch import NeuralNet
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import ExtraTreesRegressor
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('notebook')
sns.set_palette('hot', n_colors=7)

In [2]:
x = torch.cat([torch.load("dataset/x_train.pt"), torch.load("dataset/x_train_2.pt")])
y = torch.cat([torch.load("dataset/y_train.pt"), torch.load("dataset/y_train_2.pt")])
c = torch.cat([torch.load("dataset/c_train.pt"), torch.load("dataset/c_train_2.pt")])

x_test = torch.load("dataset/x_test.pt")
y_test = torch.load("dataset/y_test.pt")
c_test = torch.load("dataset/c_test.pt")

In [3]:
x0, y0, c0 = x[0], y[0], c[0]

In [5]:
print(x0.shape, y0.shape, c0.shape)

torch.Size([256, 128]) torch.Size([256, 128]) torch.Size([128, 128])


In [6]:
x_np, y_np, c_np = x.numpy(), y.numpy(), c.numpy()
x_np_reshaped, y_np_reshaped, c_np_reshaped = x_np.reshape((x_np.shape[0], -1)), y_np.reshape((y_np.shape[0], -1)), c_np.reshape((c_np.shape[0], -1))
x_np_reshaped_with_c = np.concatenate([x_np_reshaped, c_np_reshaped], axis=1)
x_test_np, y_test_np, c_test_np = x_test.numpy(), y_test.numpy(), c_test.numpy()
x_test_np_reshaped, y_test_np_reshaped, c_test_np_reshaped = x_test_np.reshape((x_test_np.shape[0], -1)), y_test_np.reshape((y_test_np.shape[0], -1)), c_test_np.reshape((c_test_np.shape[0], -1))
x_test_np_reshaped_with_c = np.concatenate([x_test_np_reshaped, c_test_np_reshaped], axis=1)

In [7]:
x_np_reshaped_with_c.shape

(27000, 49152)

In [8]:
n_components = 1024

In [9]:
class MLP(nn.Module):
    def __init__(self, input_dim=n_components, output_dim=n_components):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.net(x)

In [11]:
class CNN(nn.Module):
    def __init__(self, input_dim=n_components, output_dim=n_components):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(64, 64, kernel_size=3, padding=1),
            nn.ReLU()
        )

        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.head(self.features(x.unsqueeze(1)))

Now we do the cross validation for each of the models.

In [13]:
def scorer_rnmse(estimator, x, y):
    return rnmse(estimator.predict(x), y)

def get_rnmse():
    return rnmse

In [14]:
device = 'mps'
n_points = x_np.shape[0]
n_epochs = 64
scores = {}

In [15]:
pipe_lm = TransformedTargetRegressor(
    regressor=Pipeline([
        ("pca", PCA(n_components=n_components)),
        ("lm", LinearRegression(n_jobs=-1))
    ]),
    transformer=PCA(n_components=n_components),
    check_inverse=False
)

pipe_mlp = TransformedTargetRegressor(
    regressor=Pipeline([
        ("pca", PCA(n_components=n_components)),
        ("mlp", NeuralNet(
            MLP,
            max_epochs=n_epochs,
            criterion=get_rnmse,
            optimizer=optim.Adam,
            lr=1e-3,
            iterator_train__shuffle=False,
            device=device
        ))]),
    transformer=PCA(n_components=n_components),
    check_inverse=False
)

pipe_cnn = TransformedTargetRegressor(
    regressor=Pipeline([
        ("pca", PCA(n_components=n_components)),
        ("cnn", NeuralNet(
            CNN,
            max_epochs=n_epochs,
            criterion=get_rnmse,
            optimizer=optim.Adam,
            lr=1e-3,
            iterator_train__shuffle=False,
            device=device
        ))]),
    transformer=PCA(n_components=n_components),
    check_inverse=False
)

In [16]:
scores["LM"] = cross_val_score(
    pipe_lm,
    x_np_reshaped_with_c[:n_points], y_np_reshaped[:n_points],
    cv=5,
    scoring=scorer_rnmse
)

In [17]:
scores["MLP on PCA"] = cross_val_score(
    pipe_mlp,
    x_np_reshaped_with_c[:n_points], y_np_reshaped[:n_points],
    cv=5,
    scoring=scorer_rnmse
)

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.9540[0m        [32m0.8307[0m  1.1510
      2        [36m0.8232[0m        [32m0.8189[0m  0.5300
      3        [36m0.8162[0m        [32m0.8130[0m  0.6426
      4        [36m0.7797[0m        [32m0.6928[0m  0.9914
      5        [36m0.6499[0m        [32m0.6255[0m  1.0745
      6        [36m0.6059[0m        [32m0.5905[0m  1.1019
      7        [36m0.5777[0m        [32m0.5748[0m  1.0187
      8        [36m0.5648[0m        [32m0.5632[0m  0.9664
      9        [36m0.5551[0m        [32m0.5550[0m  1.0812
     10        [36m0.5460[0m        [32m0.5456[0m  1.1181
     11        [36m0.5374[0m        [32m0.5378[0m  1.1249
     12        [36m0.5312[0m        [32m0.5323[0m  0.9821
     13        [36m0.5264[0m        [32m0.5285[0m  1.1154
     14        [36m0.5226[0m        [32m0.5248[0m  1.0429
     15        [36m0.5188[0m        [32m0

In [18]:
scores["CNN on PCA"] = cross_val_score(
    pipe_cnn,
    x_np_reshaped_with_c[:n_points], y_np_reshaped[:n_points],
    cv=5,
    scoring=scorer_rnmse
)

KeyboardInterrupt: 

In [None]:
scores

In [None]:
plt.figure(figsize=(7, 7))
sns.boxplot(pd.DataFrame(scores))
plt.grid(True)
plt.ylabel("5-fold CV RNMSE")
plt.xticks(rotation=45)
plt.savefig("5cvrnmse.jpg", dpi=150)
plt.show()