In [24]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from transformers import ViTFeatureExtractor, ViTModel, ViTConfig
from tqdm.notebook import tqdm

In [25]:
MODEL_PATH = './data/'

In [26]:
X_train = np.load('./data/X_train_surge_new.npz')
Y_train = pd.read_csv('./data/Y_train_surge.csv')
X_test = np.load('./data/X_test_surge_new.npz')

In [27]:
np.array(X_train)

array(['id_sequence', 't_slp', 'slp', 't_surge1_input', 'surge1_input',
       't_surge2_input', 'surge2_input', 't_surge1_output',
       't_surge2_output'], dtype='<U15')

In [28]:
datalen = X_train['t_slp'].shape[0]

In [32]:
X_pressure = X_train['slp'].reshape((datalen, 40, 41, 41))
X_surge = np.concatenate([X_train['t_surge1_input'], X_train['surge1_input'], X_train['t_surge1_output']], axis = 1)
Y = np.concatenate([np.array(Y_train[f'surge1_t{i}']).reshape((datalen, -1)) for i in range(10)], axis = 1)

In [33]:
pressure_mean, pressure_std = X_pressure.mean(), X_pressure.std()
surge_mean, surge_std = X_surge.mean(), X_surge.std()

X_pressure = (X_pressure - pressure_mean)/pressure_std
X_surge = (X_surge - surge_mean)/surge_std
#Y = (Y - surge_mean)/surge_std

In [34]:
X_pressure.shape, X_surge.shape, Y.shape

((5599, 40, 41, 41), (5599, 30), (5599, 10))

In [35]:
class Model(nn.Module):
    def __init__(self, image_size = 41, patch_size = 4, num_channels = 40, encoder_stride = 4):
        super(Model, self).__init__()
        config = ViTConfig(image_size = 41, patch_size = 4, num_channels = 40, encoder_stride = 4)
        self.hidden_size = int((image_size//encoder_stride)**2 + 1) * config.hidden_size
        self.ViT = ViTModel(config)
        self.linear = nn.Linear(self.hidden_size + 30, 10)
        
    def forward(self, x):
        pressure, surge = x
        hidden = self.ViT(pressure).last_hidden_state.reshape(-1, self.hidden_size)
        x = torch.concat([hidden, surge], dim = 1)
        x = self.linear(x)
        return x

In [36]:
model = Model()

In [37]:
trainlen = int(0.9*datalen)
vallen = datalen - trainlen
train_idx, val_idx = torch.utils.data.random_split(np.arange(datalen), [trainlen, vallen])

In [38]:
X_pressure_train, X_pressure_val = X_pressure[train_idx], X_pressure[val_idx]
X_surge_train, X_surge_val = X_surge[train_idx], X_surge[val_idx]
Y_train, Y_val = Y[train_idx], Y[val_idx]

In [39]:
X_pressure_train.shape, X_surge_train.shape

((5039, 40, 41, 41), (5039, 30))

In [40]:
train_data = list(zip(X_pressure_train, X_surge_train, Y_train))
val_data = list(zip(X_pressure_val, X_surge_val, Y_val))

In [41]:
Y_train.shape

(5039, 10)

In [42]:
batch_size = 8

# training and validation data loaders
train_loader = DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True
)
val_loader = DataLoader(
    val_data,
    batch_size=batch_size,
    shuffle=False
)

In [43]:
device = torch.device('cuda')

In [44]:
model = model.to(device)

In [45]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [47]:
epochs = 15

for epoch in range(epochs):
    model.train()
    for x1, x2, y in tqdm(train_loader, total = len(train_loader), leave=False):
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        x1 = x1.type(torch.cuda.FloatTensor)
        x2 = x2.type(torch.cuda.FloatTensor)
        y = y.type(torch.cuda.FloatTensor)
        optimizer.zero_grad()
        pred = model((x1, x2))
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
    model.eval()
    val_loss = 0
    p=0
    with torch.no_grad():
        for x1, x2, y in tqdm(val_loader, total = len(val_loader), leave = False):
            x1, x2, y = x1.to(device), x2.to(device), y.to(device)
            pred = model((x1, x2))
            loss = criterion(pred, y)
            val_loss += loss.item()
            p+=1
    val_loss /= p
    print(f'Epoch {epoch+1}: Validation Loss = {val_loss}')
        

  0%|          | 0/630 [00:00<?, ?it/s]