In [2]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import os
import sys
sys.path.append('../src')
from torch import nn
from omegaconf import OmegaConf
from torch.utils.data import DataLoader
import torch
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler
import numpy as np
from tqdm import tqdm
pd.options.display.max_columns=100

In [3]:
from datalib import VentilatorDataClassification
import modellib
from utils import fc
import datalib
from torch.utils.data import Dataset
from train_classification import get_group_dict,map_dataset
import joblib
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GroupKFold
from litmodellib import ClassifcationModel
import pytorch_lightning as pl
import copy

In [4]:
class VentilatorDataClassification(Dataset):
    def __init__(
        self, group_dict, breath_df,categorical_columns, numerical_columns, target_column=None
    ):
        self.group_dict = group_dict
        self.categorical_columns = categorical_columns
        self.numerical_columns = numerical_columns
        self.target_column = target_column
        self.breath_df = breath_df

    def __len__(self):
        return len(self.breath_df)

    def __getitem__(self, idx):
        breath_id = self.breath_df.iloc[idx]
        data = self.group_dict[breath_id]
        
        cat_data = torch.tensor(
            np.array([data[k] for k in self.categorical_columns]).T, dtype=torch.long
        )
        num_data = torch.tensor(
            np.array([data[k] for k in self.numerical_columns]).T, dtype=torch.float32
        )
        u_out = torch.tensor(np.array(data["u_out"]), dtype=torch.long)

        if self.target_column is not None:
            tar_data = torch.tensor(
                np.array(data[self.target_column]), dtype=torch.long
            )
            return {
                "num": num_data,
                "cat": cat_data,
                "target": tar_data,
                "u_out": u_out,
            }
        else:
            return {"num": num_data, "cat": cat_data, "u_out": u_out}

In [5]:
DATA_DIR = '/mnt/disks/extra_data/kaggle/ventilator_prediction/'
R_MAP = {5: 0, 50: 1, 20: 2}
C_MAP = {20: 0, 50: 1, 10: 2}

In [6]:
import os
os.environ['CUDA_LAUNCH_BLOCKING']='1'

In [7]:
# train = pd.read_csv(os.path.join(DATA_DIR,'train.csv'))
# test = pd.read_csv(os.path.join(DATA_DIR,'test.csv'))
# train.breath_id = train.breath_id.map(dict(zip(train.breath_id.unique().tolist(),range(train.breath_id.nunique()))))
# test.breath_id = train.breath_id.map(dict(zip(train.breath_id.unique().tolist(),range(train.breath_id.nunique()))))

In [35]:
def preprocess(config):
    train = pd.read_csv(DATA_DIR + "train.csv")
    test = pd.read_csv(DATA_DIR + "test.csv")    
    pressure_dict = dict(
        zip(train["pressure"].unique().tolist(), range(train["pressure"].nunique()))
    )
    pressure_reverse_dict = {v: k for k, v in pressure_dict.items()}
    joblib.dump(pressure_reverse_dict, "../pressure_mapper.pkl")
    train["pressure"] = train["pressure"].map(pressure_dict)
    # df = create_feats(df)
    train = fc(train)
    test = fc(test)    
    train = train.groupby("breath_id").head(config.seq_len)
    test = test.groupby("breath_id").head(config.seq_len)    
    num_classes = train["pressure"].nunique()
    config.model.kwargs["output_dim"] = num_classes
    if config.normalization.is_norm:
        scl = RobustScaler()
        print(config.dataset.train.kwargs.numerical_columns)
        for col in config.dataset.train.kwargs.numerical_columns:
            train[col] = scl.fit_transform(train[[col]])
            test[col] = scl.transform(test[[col]])        
    return train,test

def create_path(path):
    if not os.path.exists(path):
        os.mkdir(path)    
        
def predict(model,dl,data,device,is_test=False):
    df = copy.deepcopy(data)
    if not is_test:
        df['pressure'] = df['pressure'].map(joblib.load('../pressure_mapper.pkl'))    
    trainer = pl.Trainer(gpus=[device])
    trainer.test(model = model,test_dataloaders=dl)    
    preds = [x['preds'] for x in torch.load('prediction.pt')]
    df['preds'] = preds
    return df        

In [9]:
config = OmegaConf.load('../experiments/RNN-classification-top3-5-folds/config.yaml')

In [10]:
%%time
train,test = preprocess(config)

5
10
5
10
5
10
5
10
['time_step', 'u_in', 'mean_u_in_last_5', 'min_u_in_last_5', 'max_u_in_last_5', 'std_u_in_last_5', 'mean_u_in_last_10', 'min_u_in_last_10', 'max_u_in_last_10', 'std_u_in_last_10', 'mean_u_in_next_5', 'min_u_in_next_5', 'max_u_in_next_5', 'std_u_in_next_5', 'mean_u_in_next_10', 'min_u_in_next_10', 'max_u_in_next_10', 'std_u_in_next_10', 'u_in_cumsum', 'u_in_cummean', 'u_in_cummax', 'R+C', 'R/C', 'u_in/C', 'u_in/R', 'u_in_cumsum/C', 'u_in_cumsum/R', 'lag_u_in_1', 'lead_u_in_1', 'lag_u_in_2', 'lead_u_in_2', 'lag_u_in_3', 'lead_u_in_3', 'lag_u_in_4', 'lead_u_in_4', 'auc', 'lag_auc_1', 'lead_auc_1', 'lag_auc_2', 'lead_auc_2', 'per_change_u_in_lag_u_in_1', 'per_change_u_in_lead_u_in_1', 'per_change_u_in_lag_u_in_2', 'per_change_u_in_lead_u_in_2', 'per_change_u_in_lag_u_in_3', 'per_change_u_in_lead_u_in_3', 'per_change_u_in_lag_u_in_4', 'per_change_u_in_lead_u_in_4', 'per_change_auc_lag_auc_1', 'per_change_auc_lead_auc_1', 'per_change_auc_lag_auc_2', 'per_change_auc_lead_a

In [11]:
train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,mean_u_in_last_5,min_u_in_last_5,max_u_in_last_5,std_u_in_last_5,mean_u_in_last_10,min_u_in_last_10,max_u_in_last_10,std_u_in_last_10,mean_u_in_next_5,min_u_in_next_5,max_u_in_next_5,std_u_in_next_5,mean_u_in_next_10,min_u_in_next_10,max_u_in_next_10,std_u_in_next_10,u_in_cumsum,u_in_cummean,u_in_cummax,R+C,R/C,u_in/C,u_in/R,u_in_cumsum/C,u_in_cumsum/R,lag_u_in_1,lead_u_in_1,lag_u_in_2,lead_u_in_2,lag_u_in_3,lead_u_in_3,lag_u_in_4,lead_u_in_4,auc,lag_auc_1,lead_auc_1,lag_auc_2,lead_auc_2,per_change_u_in_lag_u_in_1,per_change_u_in_lead_u_in_1,per_change_u_in_lag_u_in_2,per_change_u_in_lead_u_in_2,per_change_u_in_lag_u_in_3,per_change_u_in_lead_u_in_3,per_change_u_in_lag_u_in_4,per_change_u_in_lead_u_in_4,per_change_auc_lag_auc_1,per_change_auc_lead_auc_1,per_change_auc_lag_auc_2,per_change_auc_lead_auc_2
79,1,1,20,50,-0.970859,-0.297619,0,0,-0.384505,-0.171127,-0.397614,-0.263337,-0.458142,-0.042493,-0.471967,-0.399774,1.065953,-0.031305,1.068894,3.439557,1.765548,0.018408,1.204426,1.819364,-0.594637,-0.595067,-0.524132,0.25,-0.285714,-0.298032,-0.146694,-0.540842,-0.260392,-0.302266,1.119839,-0.300506,1.565188,-0.298822,1.727055,-0.297051,2.089275,-0.321506,-0.319732,0.374058,-0.318002,1.317381,11.434298,-2903.481331,5.421276,-1995.974173,3.376025,-1409.210554,2.382032,-1166.670665,0.0,-3781913.0,0.0,-4231453.0
78,2,1,20,50,-0.92004,1.070559,0,1,0.256369,-0.171127,0.53402,3.539037,0.17424,-0.042493,0.305697,1.830429,1.492825,2.278508,1.175318,0.973563,2.013322,4.060737,1.204426,0.490097,-0.536858,-0.049312,-0.076967,0.25,-0.285714,0.280513,0.917149,-0.519957,-0.230466,-0.296024,1.437315,-0.300506,1.589683,-0.298822,1.948038,-0.297051,2.263045,0.377165,-0.319732,1.230518,-0.318002,1.502362,11.3826,-2.96783,5.421341,-1.785682,3.376065,-1.96011,2.382061,-1.857665,11.581411,-14.94934,4.868676,-9.054313
77,3,1,20,50,-0.868904,1.379057,0,2,0.56633,-0.171127,0.744086,3.244096,0.480095,-0.042493,0.481046,1.657438,1.630177,2.799327,1.175318,0.588612,2.090843,4.972205,1.204426,0.205902,-0.46611,0.214644,0.023861,0.25,-0.285714,0.410964,1.157025,-0.494385,-0.193823,1.074675,1.460363,-0.294247,1.797963,-0.298822,2.113232,-0.297051,2.250963,1.235291,0.380455,1.407109,-0.318002,1.626717,2.096077,-0.175955,5.40127,-0.937977,3.376065,-1.090626,2.382061,-0.789295,6.383831,-1.38333,4.868685,-1.161575
76,4,1,20,50,-0.817518,1.401453,0,3,0.726556,-0.171127,0.759335,2.900334,0.638198,-0.042493,0.493775,1.455811,1.697694,2.837136,1.175318,0.44314,2.141576,5.038373,1.204426,0.106237,-0.394421,0.351089,0.03118,0.25,-0.285714,0.420434,1.174439,-0.468473,-0.156693,1.383742,1.656333,1.080204,1.953662,-0.292548,2.101746,-0.297051,2.221781,1.412225,1.240442,1.525824,0.383828,1.788949,0.150167,-1.476499,1.051947,-1.447407,3.36373,-0.978445,2.382061,-0.674475,1.181936,-0.8350467,2.90667,-0.9663265
75,5,1,20,50,-0.765851,1.591881,0,4,0.858372,-0.171127,0.889004,2.742609,0.768266,-0.042493,0.602014,1.363301,1.777109,3.158625,1.209125,0.107245,2.178405,5.601001,1.204426,-0.084066,-0.314727,0.46334,0.093419,0.25,-0.285714,0.500958,1.322509,-0.439666,-0.115416,1.406178,1.80283,1.390116,1.942836,1.085013,2.074004,-0.29076,2.318246,1.531171,1.417761,1.680698,1.245835,1.863975,1.148604,-0.992875,0.608626,-0.518236,0.928411,-0.295899,2.374232,-0.38065,0.743551,-1.019456,0.77755,-0.7486339


In [12]:
train = map_dataset(train)
test = map_dataset(test)

In [13]:
train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,mean_u_in_last_5,min_u_in_last_5,max_u_in_last_5,std_u_in_last_5,mean_u_in_last_10,min_u_in_last_10,max_u_in_last_10,std_u_in_last_10,mean_u_in_next_5,min_u_in_next_5,max_u_in_next_5,std_u_in_next_5,mean_u_in_next_10,min_u_in_next_10,max_u_in_next_10,std_u_in_next_10,u_in_cumsum,u_in_cummean,u_in_cummax,R+C,R/C,u_in/C,u_in/R,u_in_cumsum/C,u_in_cumsum/R,lag_u_in_1,lead_u_in_1,lag_u_in_2,lead_u_in_2,lag_u_in_3,lead_u_in_3,lag_u_in_4,lead_u_in_4,auc,lag_auc_1,lead_auc_1,lag_auc_2,lead_auc_2,per_change_u_in_lag_u_in_1,per_change_u_in_lead_u_in_1,per_change_u_in_lag_u_in_2,per_change_u_in_lead_u_in_2,per_change_u_in_lag_u_in_3,per_change_u_in_lead_u_in_3,per_change_u_in_lag_u_in_4,per_change_u_in_lead_u_in_4,per_change_auc_lag_auc_1,per_change_auc_lead_auc_1,per_change_auc_lag_auc_2,per_change_auc_lead_auc_2
79,1,0,2,1,-0.970859,-0.297619,0,0,-0.384505,-0.171127,-0.397614,-0.263337,-0.458142,-0.042493,-0.471967,-0.399774,1.065953,-0.031305,1.068894,3.439557,1.765548,0.018408,1.204426,1.819364,-0.594637,-0.595067,-0.524132,0.25,-0.285714,-0.298032,-0.146694,-0.540842,-0.260392,-0.302266,1.119839,-0.300506,1.565188,-0.298822,1.727055,-0.297051,2.089275,-0.321506,-0.319732,0.374058,-0.318002,1.317381,11.434298,-2903.481331,5.421276,-1995.974173,3.376025,-1409.210554,2.382032,-1166.670665,0.0,-3781913.0,0.0,-4231453.0
78,2,0,2,1,-0.92004,1.070559,0,1,0.256369,-0.171127,0.53402,3.539037,0.17424,-0.042493,0.305697,1.830429,1.492825,2.278508,1.175318,0.973563,2.013322,4.060737,1.204426,0.490097,-0.536858,-0.049312,-0.076967,0.25,-0.285714,0.280513,0.917149,-0.519957,-0.230466,-0.296024,1.437315,-0.300506,1.589683,-0.298822,1.948038,-0.297051,2.263045,0.377165,-0.319732,1.230518,-0.318002,1.502362,11.3826,-2.96783,5.421341,-1.785682,3.376065,-1.96011,2.382061,-1.857665,11.581411,-14.94934,4.868676,-9.054313
77,3,0,2,1,-0.868904,1.379057,0,2,0.56633,-0.171127,0.744086,3.244096,0.480095,-0.042493,0.481046,1.657438,1.630177,2.799327,1.175318,0.588612,2.090843,4.972205,1.204426,0.205902,-0.46611,0.214644,0.023861,0.25,-0.285714,0.410964,1.157025,-0.494385,-0.193823,1.074675,1.460363,-0.294247,1.797963,-0.298822,2.113232,-0.297051,2.250963,1.235291,0.380455,1.407109,-0.318002,1.626717,2.096077,-0.175955,5.40127,-0.937977,3.376065,-1.090626,2.382061,-0.789295,6.383831,-1.38333,4.868685,-1.161575
76,4,0,2,1,-0.817518,1.401453,0,3,0.726556,-0.171127,0.759335,2.900334,0.638198,-0.042493,0.493775,1.455811,1.697694,2.837136,1.175318,0.44314,2.141576,5.038373,1.204426,0.106237,-0.394421,0.351089,0.03118,0.25,-0.285714,0.420434,1.174439,-0.468473,-0.156693,1.383742,1.656333,1.080204,1.953662,-0.292548,2.101746,-0.297051,2.221781,1.412225,1.240442,1.525824,0.383828,1.788949,0.150167,-1.476499,1.051947,-1.447407,3.36373,-0.978445,2.382061,-0.674475,1.181936,-0.8350467,2.90667,-0.9663265
75,5,0,2,1,-0.765851,1.591881,0,4,0.858372,-0.171127,0.889004,2.742609,0.768266,-0.042493,0.602014,1.363301,1.777109,3.158625,1.209125,0.107245,2.178405,5.601001,1.204426,-0.084066,-0.314727,0.46334,0.093419,0.25,-0.285714,0.500958,1.322509,-0.439666,-0.115416,1.406178,1.80283,1.390116,1.942836,1.085013,2.074004,-0.29076,2.318246,1.531171,1.417761,1.680698,1.245835,1.863975,1.148604,-0.992875,0.608626,-0.518236,0.928411,-0.295899,2.374232,-0.38065,0.743551,-1.019456,0.77755,-0.7486339


In [14]:
%%time
train_grp_dict = get_group_dict(train)

100%|██████████| 75450/75450 [05:52<00:00, 214.14it/s]

CPU times: user 5min 53s, sys: 6.1 s, total: 5min 59s
Wall time: 5min 52s





In [15]:
%%time
test_grp_dict = get_group_dict(test)

100%|██████████| 50300/50300 [03:49<00:00, 219.30it/s]

CPU times: user 3min 48s, sys: 4 s, total: 3min 52s
Wall time: 3min 49s





In [16]:
len(train_grp_dict),len(test_grp_dict)

(75450, 50300)

In [17]:
folds = GroupKFold(n_splits=5)
folds = list(folds.split(train, groups=train["breath_id"]))

In [24]:
models = [
    '../experiments/RNN-classification-top3-5-folds/fold_0/model-epoch=96-val_MAE=0.1722-val_loss=0.0000.ckpt',
    '../experiments/RNN-classification-top3-5-folds/fold_1/model-epoch=97-val_MAE=0.1653-val_loss=0.0000.ckpt',
    '../experiments/RNN-classification-top3-5-folds/fold_2/model-epoch=98-val_MAE=0.1731-val_loss=0.0000.ckpt',
    '../experiments/RNN-classification-top3-5-folds/fold_3/model-epoch=90-val_MAE=0.1671-val_loss=0.0000.ckpt',
    '../experiments/RNN-classification-top3-5-folds/fold_4/model-epoch=84-val_MAE=0.1719-val_loss=0.0000.ckpt',    
]

In [26]:
from sklearn.metrics import mean_absolute_error

In [25]:
oof_preds=[]
test_preds=[]

In [30]:
test_df = VentilatorDataClassification(group_dict = test_grp_dict, breath_df = test[['breath_id']].drop_duplicates()['breath_id'],
                                      categorical_columns = config.dataset.train.kwargs.categorical_columns, 
                                      numerical_columns = config.dataset.train.kwargs.numerical_columns)
test_dl = DataLoader(dataset = test_df,batch_size = 128,num_workers = 8,pin_memory=True,shuffle = False)

In [36]:
for i in [0,1,2,3,4]:
    val = train.iloc[folds[i][1]]
    val_df = VentilatorDataClassification(group_dict = train_grp_dict, breath_df = val[['breath_id']].drop_duplicates()['breath_id'],
                                          categorical_columns = config.dataset.train.kwargs.categorical_columns, 
                                          numerical_columns = config.dataset.train.kwargs.numerical_columns, target_column=config.dataset.train.kwargs.target_column)
    val_dl = DataLoader(dataset = val_df,batch_size = 256,num_workers = 8,pin_memory=True,shuffle = False)
    
    wt_dict = torch.load(models[i])
    lit_model = ClassifcationModel(config,mapping = '../pressure_mapper.pkl',topk = 3)
    lit_model.load_state_dict(state_dict=wt_dict['state_dict'])    
    preds = predict(lit_model,val_dl,val[['pressure','id','breath_id','R','C','u_out']],0)
    oot_preds = predict(lit_model,test_dl,test[['id','breath_id','R','C','u_out']],0,is_test=True)
    print('MAE:  ',mean_absolute_error(preds[preds.u_out==0]['pressure'],preds[preds.u_out==0]['preds']))
    oof_preds.append(preds)    
    test_preds.append(oot_preds)        

{'init_type': 'yakama'}
tensor(42.5665)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


tensor(77.7641)


Testing: 0it [00:00, ?it/s]

  rank_zero_deprecation(


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
MAE:   0.1721731916746267
{'init_type': 'yakama'}
tensor(34.0886)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


tensor(-207.7477)


Testing: 0it [00:00, ?it/s]

  rank_zero_deprecation(


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
MAE:   0.16533733490256855
{'init_type': 'yakama'}
tensor(-199.8125)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


tensor(79.4620)


Testing: 0it [00:00, ?it/s]

  rank_zero_deprecation(


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
MAE:   0.1730699198981462
{'init_type': 'yakama'}
tensor(-95.2080)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


tensor(56.9810)


Testing: 0it [00:00, ?it/s]

  rank_zero_deprecation(


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
MAE:   0.16712187963936026
{'init_type': 'yakama'}
tensor(220.4322)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


tensor(139.0257)


Testing: 0it [00:00, ?it/s]

  rank_zero_deprecation(


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
MAE:   0.17189552805151784


In [40]:
oof_preds.head()

Unnamed: 0,pressure,id,breath_id,R,C,u_out,preds
319,6.259305,241,3,1,1,0,6.470211
318,5.76719,242,3,1,1,0,5.626585
317,6.540513,243,3,1,1,0,6.470211
316,8.649578,244,3,1,1,0,8.508973
315,10.758642,245,3,1,1,0,10.68834


In [39]:
oof_preds = pd.concat(oof_preds,axis=0)

In [41]:
mean_absolute_error(oof_preds[oof_preds.u_out==0]['pressure'],oof_preds[oof_preds.u_out==0]['preds'])

0.16991950976016562

In [77]:
oot_preds = pd.concat([x['preds'] for x in test_preds],axis=1).median(axis=1)
oot_preds = pd.concat([test_preds[0]['id'],oot_preds],axis=1)
oot_preds.columns = ['id','pressure']

In [78]:
sub = pd.read_csv(DATA_DIR+'sample_submission.csv')
sub = sub[['id']].merge(oot_preds,on='id',how='left')
sub.pressure = sub.pressure.fillna(0)

In [83]:
sub.to_csv('../subs/baseline_LSTM_Classification.csv',index=False)

In [92]:
oof_preds.reset_index(drop=True,inplace=True)

In [93]:
oof_preds.head()

Unnamed: 0,pressure,id,breath_id,R,C,u_out,preds
0,6.259305,241,3,1,1,0,6.470211
1,5.76719,242,3,1,1,0,5.626585
2,6.540513,243,3,1,1,0,6.470211
3,8.649578,244,3,1,1,0,8.508973
4,10.758642,245,3,1,1,0,10.68834


In [94]:
oof_preds.to_feather('../oofs/{}.feather'.format(config.experiment_name))

In [97]:
oof_preds = oof_preds[oof_preds.u_out==0]

In [98]:
oof_preds.head()

Unnamed: 0,pressure,id,breath_id,R,C,u_out,preds
0,6.259305,241,3,1,1,0,6.470211
1,5.76719,242,3,1,1,0,5.626585
2,6.540513,243,3,1,1,0,6.470211
3,8.649578,244,3,1,1,0,8.508973
4,10.758642,245,3,1,1,0,10.68834


(2290968, 7)