In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader, SubsetRandomSampler

from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import StratifiedKFold, KFold

from torch_lr_finder import LRFinder

from data import preprocessing, postprocessing
from modelling import LSTMModel_base, train_epoch, valid_epoch, L1Loss_masked
from func import get_timestamp

import timeit, copy

In [None]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [None]:
data_kwargs = {'u_in_cumsum': True,
          'u_in_lag12': True,
          'u_in_lag34': False,
          'u_in_lag_back12': True,
          'u_in_lag_back34': False,
          'u_in_diff12': True,
          'u_in_diff34': False,
          'u_in_diff_back12': True,
          'u_in_diff_back34': False,
          'u_in_last': False,
          'u_in_max': False,
          'scaler': RobustScaler()}
train, test, features = preprocessing(train, test, **data_kwargs)

In [None]:
target = train['pressure']
train.drop(columns = ['id', 'breath_id', 'pressure'], inplace = True)
test.drop(columns = ['id', 'breath_id'], inplace = True)
input_size = train.shape[1]
features, train.columns, input_size

In [None]:
train = torch.tensor(train.to_numpy()).reshape(-1,80, input_size).float()
test = torch.tensor(test.to_numpy()).reshape(-1,80, input_size).float()
target = torch.tensor(target.to_numpy()).reshape(-1,80,1).float()

In [None]:
modelnames = ["kaggle_fold_1.pth"]

timestamp = get_timestamp()
df = pd.read_csv('data/sample_submission.csv')

test_target = torch.zeros(test.shape[0],80,1).float()
test_dataset = TensorDataset(test, test_target)
test_loader = DataLoader(test_dataset, batch_size=256, num_workers = 4)

print(f"Total folds: {len(modelnames)}")

for fold, modelname in enumerate(modelnames):
    model = LSTMModel_base(input_size)
    model.to("cuda")
    filename = f'models/{modelname}'
    model.load_state_dict(torch.load(filename, map_location="cuda"))
    model.eval()

    y_preds = []
    for x, y in test_loader:
        x = x.to("cuda")
        with torch.no_grad():
            y_pred = model(x).squeeze()

        y_preds.append(y_pred.cpu().numpy())

    y_preds = np.concatenate(y_preds, axis=0).ravel()
    df[fold] = y_preds
    
    print(f"Fold {fold} done")
submission = postprocessing(df, len(modelnames))
submission.to_csv(f'submission.csv', index = False)
print('Done')