In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import os
import sys
sys.path.append('../src')
from torch import nn
from omegaconf import OmegaConf
from torch.utils.data import DataLoader
import torch
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler
import numpy as np
from tqdm import tqdm
pd.options.display.max_columns=100

In [2]:
from datalib import VentilatorDataClassification
import modellib
from utils import fc
import datalib
from torch.utils.data import Dataset
from train_classification import get_group_dict,map_dataset
import joblib
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GroupKFold
from litmodellib import ClassifcationModel
import pytorch_lightning as pl
import copy
import glob
import re

In [3]:
class VentilatorDataClassification(Dataset):
    def __init__(
        self, group_dict, breath_df,categorical_columns, numerical_columns, target_column=None,shift=0,
        seq_len=40
    ):
        self.group_dict = group_dict
        self.categorical_columns = categorical_columns
        self.numerical_columns = numerical_columns
        self.target_column = target_column
        self.breath_df = breath_df
        self.shift =shift
        self.seq_len = seq_len
    def __len__(self):
        return len(self.breath_df)

    def __getitem__(self, idx):
        breath_id = self.breath_df.iloc[idx]
        data = self.group_dict[breath_id]
        start_idx = self.shift
        end_idx = self.shift+self.seq_len
#         print(start_idx,end_idx)
        cat_data = torch.tensor(
            np.array([data[k] for k in self.categorical_columns]).T[start_idx:end_idx], dtype=torch.long
        )
        num_data = torch.tensor(
            np.array([data[k] for k in self.numerical_columns]).T[start_idx:end_idx], dtype=torch.float32
        )
        u_out = torch.tensor(np.array(data["u_out"])[start_idx:end_idx], dtype=torch.long)

        if self.target_column is not None:
            tar_data = torch.tensor(
                np.array(data[self.target_column])[start_idx:end_idx], dtype=torch.long
            )
            return {
                "num": num_data,
                "cat": cat_data,
                "target": tar_data,
                "u_out": u_out,
            }
        else:
            return {"num": num_data, "cat": cat_data, "u_out": u_out}

In [4]:
DATA_DIR = '/mnt/disks/extra_data/kaggle/ventilator_prediction/'
R_MAP = {5: 0, 50: 1, 20: 2}
C_MAP = {20: 0, 50: 1, 10: 2}

In [5]:
import os
os.environ['CUDA_LAUNCH_BLOCKING']='1'

In [6]:
# train = pd.read_csv(os.path.join(DATA_DIR,'train.csv'))
# test = pd.read_csv(os.path.join(DATA_DIR,'test.csv'))
# train.breath_id = train.breath_id.map(dict(zip(train.breath_id.unique().tolist(),range(train.breath_id.nunique()))))
# test.breath_id = train.breath_id.map(dict(zip(train.breath_id.unique().tolist(),range(train.breath_id.nunique()))))

In [25]:
def preprocess(config):
    train = pd.read_csv(DATA_DIR + "train.csv")
    test = pd.read_csv(DATA_DIR + "test.csv")    
#     pressure_dict = dict(
#         zip(train["pressure"].unique().tolist(), range(train["pressure"].nunique()))
#     )
    pressure_dict = {
        v: i for i, v in enumerate(sorted(train["pressure"].unique().tolist()))
    }

    pressure_reverse_dict = {v: k for k, v in pressure_dict.items()}
    # joblib.dump(pressure_reverse_dict, "../pressure_mapper.pkl")
    joblib.dump(pressure_reverse_dict, "../sorted_pressure_mapper.pkl")
    train["pressure"] = train["pressure"].map(pressure_dict)
    # df = create_feats(df)
    train = fc(train)
    test = fc(test)    
    config.seq_len=80
    train = train.groupby("breath_id").head(config.seq_len)
    test = test.groupby("breath_id").head(config.seq_len)    
    num_classes = train["pressure"].nunique()
    config.model.kwargs["output_dim"] = num_classes
    if config.normalization.is_norm:
        scl = RobustScaler()
        print(config.dataset.train.kwargs.numerical_columns)
        for col in config.dataset.train.kwargs.numerical_columns:
            train[col] = scl.fit_transform(train[[col]])
            test[col] = scl.transform(test[[col]])        
    return train,test

def create_path(path):
    if not os.path.exists(path):
        os.mkdir(path)    
        
def predict(model,dl,data,device,is_test=False):
    df = copy.deepcopy(data)
    df = df[['pressure','id','breath_id','R','C','u_out']].groupby('breath_id').head(40)    
    if not is_test:
#         df['pressure'] = df['pressure'].map(joblib.load('../pressure_mapper.pkl'))    
        df['pressure'] = df['pressure'].map(joblib.load('../sorted_pressure_mapper.pkl'))            
    trainer = pl.Trainer(gpus=[device])
    trainer.test(model = model,test_dataloaders=dl)    
    preds = [x['preds'] for x in torch.load('prediction.pt')]
    df['preds'] = preds
    return df        

def get_model_path(model_dir,metric_name):
    regex = re.compile('{}=(\d+\.?\d+)'.format(metric_name))    
    models = glob.glob(model_dir)
    score = [float(regex.findall(x)[0]) for x in models]
    idx = np.argmax(score)
    return models[idx]

In [8]:
config = OmegaConf.load('../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/config.yaml')

In [9]:
# config.topk=1
# config.topk

In [10]:
%%time
train,test = preprocess(config)

5
10
5
10
5
10
5
10
['time_step', 'u_in', 'mean_u_in_last_5', 'min_u_in_last_5', 'max_u_in_last_5', 'std_u_in_last_5', 'mean_u_in_last_10', 'min_u_in_last_10', 'max_u_in_last_10', 'std_u_in_last_10', 'mean_u_in_next_5', 'min_u_in_next_5', 'max_u_in_next_5', 'std_u_in_next_5', 'mean_u_in_next_10', 'min_u_in_next_10', 'max_u_in_next_10', 'std_u_in_next_10', 'u_in_cumsum', 'u_in_cummean', 'u_in_cummax', 'R+C', 'R/C', 'u_in/C', 'u_in/R', 'u_in_cumsum/C', 'u_in_cumsum/R', 'lag_u_in_1', 'lead_u_in_1', 'lag_u_in_2', 'lead_u_in_2', 'lag_u_in_3', 'lead_u_in_3', 'lag_u_in_4', 'lead_u_in_4', 'auc', 'lag_auc_1', 'lead_auc_1', 'lag_auc_2', 'lead_auc_2', 'per_change_u_in_lag_u_in_1', 'per_change_u_in_lead_u_in_1', 'per_change_u_in_lag_u_in_2', 'per_change_u_in_lead_u_in_2', 'per_change_u_in_lag_u_in_3', 'per_change_u_in_lead_u_in_3', 'per_change_u_in_lag_u_in_4', 'per_change_u_in_lead_u_in_4', 'per_change_auc_lag_auc_1', 'per_change_auc_lead_auc_1', 'per_change_auc_lag_auc_2', 'per_change_auc_lead_a

In [11]:
train = map_dataset(train)
test = map_dataset(test)

In [12]:
train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,mean_u_in_last_5,min_u_in_last_5,max_u_in_last_5,std_u_in_last_5,mean_u_in_last_10,min_u_in_last_10,max_u_in_last_10,std_u_in_last_10,mean_u_in_next_5,min_u_in_next_5,max_u_in_next_5,std_u_in_next_5,mean_u_in_next_10,min_u_in_next_10,max_u_in_next_10,std_u_in_next_10,u_in_cumsum,u_in_cummean,u_in_cummax,R+C,R/C,u_in/C,u_in/R,u_in_cumsum/C,u_in_cumsum/R,lag_u_in_1,lead_u_in_1,lag_u_in_2,lead_u_in_2,lag_u_in_3,lead_u_in_3,lag_u_in_4,lead_u_in_4,auc,lag_auc_1,lead_auc_1,lag_auc_2,lead_auc_2,per_change_u_in_lag_u_in_1,per_change_u_in_lead_u_in_1,per_change_u_in_lag_u_in_2,per_change_u_in_lead_u_in_2,per_change_u_in_lag_u_in_3,per_change_u_in_lead_u_in_3,per_change_u_in_lag_u_in_4,per_change_u_in_lead_u_in_4,per_change_auc_lag_auc_1,per_change_auc_lead_auc_1,per_change_auc_lag_auc_2,per_change_auc_lead_auc_2
79,1,0,2,1,-0.989052,-0.937384,0,110,-0.920162,-0.633538,-0.755087,-0.257146,-0.771404,-0.295639,-0.57101,-0.301953,3.458282,-0.717374,4.772503,11.840293,5.063297,-0.532296,6.379929,6.154286,-0.725228,-0.648438,-0.549983,0.25,-0.285714,-0.37803,-0.175026,-0.648289,-0.284011,-0.864121,2.890207,-0.842527,3.664026,-0.819258,3.743915,-0.794152,4.277824,-0.957882,-0.88811,1.108032,-0.833871,3.492524,28.206025,-6620.540119,14.277427,-3573.218838,8.873772,-1918.410927,5.921071,-1428.920865,0.0,-5817646.0,-0.02556,-6775969.0
78,2,0,2,1,-0.963608,3.049278,0,111,1.015117,-0.633538,2.202065,13.158229,0.858288,-0.295639,1.600962,5.28806,4.872757,2.982761,5.214485,3.729181,5.813562,3.207547,6.379929,2.078486,-0.676829,0.162935,-0.10554,0.25,-0.285714,0.488786,1.149983,-0.631497,-0.261962,-0.84736,3.739712,-0.842527,3.724218,-0.819258,4.256189,-0.794152,4.661047,1.108032,-0.88811,3.645443,-0.833871,3.991865,28.078305,-6.726762,14.277598,-3.165421,8.873879,-2.645378,5.921142,-2.258269,20.990946,-22.99629,10.626289,-14.48784
77,3,0,2,1,-0.938006,3.948195,0,139,1.951123,-0.633538,2.868846,12.117632,1.646495,-0.295639,2.0907,4.854457,5.327884,3.817071,5.214485,2.463003,6.048299,4.05081,6.379929,1.207088,-0.617568,0.555359,-0.005326,0.25,-0.285714,0.684236,1.448747,-0.610936,-0.234965,2.83314,3.801381,-0.825784,4.236036,-0.819258,4.639136,-0.794152,4.634403,3.645443,1.067412,4.168626,-0.833871,4.32755,5.135959,-0.360668,14.224575,-1.647836,8.873879,-1.461703,5.921142,-0.949729,11.570494,-2.127951,10.626308,-1.848927
76,4,0,2,1,-0.912278,4.013452,0,194,2.434965,-0.633538,2.917252,10.904784,2.053937,-0.295639,2.126253,4.349079,5.551609,3.877638,5.214485,1.984519,6.201918,4.112027,6.379929,0.901494,-0.557517,0.758212,0.001949,0.25,-0.285714,0.698425,1.470436,-0.590101,-0.207608,3.663022,4.32576,2.850764,4.618643,-0.802501,4.612511,-0.794152,4.570046,4.168626,3.469235,4.520339,1.046161,4.765483,0.328589,-3.326197,2.734628,-2.559831,8.84129,-1.308985,5.921142,-0.809098,2.142221,-1.284537,6.333747,-1.536268
75,5,0,2,1,-0.886409,4.568332,0,201,2.833014,-0.633538,3.32884,10.348305,2.389133,-0.295639,2.428557,4.117201,5.814756,4.392638,5.35489,0.8797,6.313437,4.632553,6.379929,0.317987,-0.490761,0.925096,0.063808,0.25,-0.285714,0.819072,1.654856,-0.566939,-0.177197,3.723268,4.717757,3.679756,4.592042,2.877119,4.5482,-0.777379,4.782785,4.520339,3.964461,4.979181,3.355267,4.968007,2.795227,-2.223427,1.563474,-0.896403,2.407178,-0.379798,5.901538,-0.449221,1.347663,-1.56821,1.675589,-1.18767


In [13]:
%%time
train_grp_dict = get_group_dict(train)

100%|██████████| 75450/75450 [08:48<00:00, 142.74it/s]

CPU times: user 8min 42s, sys: 9.35 s, total: 8min 51s
Wall time: 8min 48s





In [14]:
%%time
test_grp_dict = get_group_dict(test)

100%|██████████| 50300/50300 [05:46<00:00, 145.01it/s] 

CPU times: user 5min 42s, sys: 6.49 s, total: 5min 48s
Wall time: 5min 46s





In [15]:
len(train_grp_dict),len(test_grp_dict)

(75450, 50300)

In [16]:
folds = GroupKFold(n_splits=10)
folds = list(folds.split(train, groups=train["breath_id"]))

In [17]:
len(folds)

10

In [18]:
# models = [
#     '../experiments/RNN-classification-top3-5-folds/fold_0/model-epoch=96-val_MAE=0.1722-val_loss=0.0000.ckpt',
#     '../experiments/RNN-classification-top3-5-folds/fold_1/model-epoch=97-val_MAE=0.1653-val_loss=0.0000.ckpt',
#     '../experiments/RNN-classification-top3-5-folds/fold_2/model-epoch=98-val_MAE=0.1731-val_loss=0.0000.ckpt',
#     '../experiments/RNN-classification-top3-5-folds/fold_3/model-epoch=90-val_MAE=0.1671-val_loss=0.0000.ckpt',
#     '../experiments/RNN-classification-top3-5-folds/fold_4/model-epoch=84-val_MAE=0.1719-val_loss=0.0000.ckpt',    
# ]
# models = [
#     '../experiments/v2-RNN-classification-top3/fold_0/model-epoch=96-val_MAE=0.1688-val_loss=0.0000.ckpt',
#     '../experiments/v2-RNN-classification-top3/fold_1/model-epoch=138-val_MAE=0.1616-val_loss=0.0000.ckpt',
#     '../experiments/v2-RNN-classification-top3/fold_2/model-epoch=146-val_MAE=0.1686-val_loss=0.0000.ckpt',
#     '../experiments/v2-RNN-classification-top3/fold_3/model-epoch=148-val_MAE=0.1669-val_loss=0.0000.ckpt',
#     '../experiments/v2-RNN-classification-top3/fold_4/model-epoch=148-val_MAE=0.1684-val_loss=0.0000.ckpt',    
# ]

# models = [
#     '../experiments/v0-LSTM-classification-top3-smooth-loss/fold_0/model-epoch=99-val_MAE=0.1631-val_loss=0.0000.ckpt',
#     '../experiments/v0-LSTM-classification-top3-smooth-loss/fold_1/model-epoch=95-val_MAE=0.1572-val_loss=0.0000.ckpt',
#     '../experiments/v0-LSTM-classification-top3-smooth-loss/fold_2/model-epoch=97-val_MAE=0.1656-val_loss=0.0000.ckpt',
#     '../experiments/v0-LSTM-classification-top3-smooth-loss/fold_3/model-epoch=99-val_MAE=0.1586-val_loss=0.0000.ckpt',
#     '../experiments/v0-LSTM-classification-top3-smooth-loss/fold_4/model-epoch=94-val_MAE=0.1660-val_loss=0.0000.ckpt',    
# ]

# models = [
#     '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4/fold_0/model-epoch=97-val_MAE=0.1532-val_loss=0.0000.ckpt',
#     '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4/fold_1/model-epoch=99-val_MAE=0.1469-val_loss=0.0000.ckpt',
#     '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4/fold_2/model-epoch=99-val_MAE=0.1550-val_loss=0.0000.ckpt',
#     '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4/fold_3/model-epoch=99-val_MAE=0.1542-val_loss=0.0000.ckpt', 
#     '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4/fold_4/model-epoch=99-val_MAE=0.1557-val_loss=0.0000.ckpt',    

# ]

models = [get_model_path('../experiments/{}/fold_{}/*.ckpt'.format(config.experiment_name,x),'val_MAE') for x in range(10)]
models

['../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_0/model-epoch=94-val_MAE=0.1434-val_loss=0.0000.ckpt',
 '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_1/model-epoch=97-val_MAE=0.1474-val_loss=0.0000.ckpt',
 '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_2/model-epoch=99-val_MAE=0.1499-val_loss=0.0000.ckpt',
 '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_3/model-epoch=97-val_MAE=0.1511-val_loss=0.0000.ckpt',
 '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_4/model-epoch=99-val_MAE=0.1538-val_loss=0.0000.ckpt',
 '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_5/model-epoch=98-val_MAE=0.1523-val_loss=0.0000.ckpt',
 '../experiments/LSTMDpRelu-Transformer-concat-skip-classify-smooth-CE-dp-0.4-10-folds/fold_6/model-epoch=97-val_MAE=0

In [19]:
from sklearn.metrics import mean_absolute_error

In [20]:
oof_preds=[]
test_preds=[]

In [20]:
# test_df = VentilatorDataClassification(group_dict = test_grp_dict, breath_df = test[['breath_id']].drop_duplicates()['breath_id'],
#                                       categorical_columns = config.dataset.train.kwargs.categorical_columns, 
#                                       numerical_columns = config.dataset.train.kwargs.numerical_columns)
# test_dl = DataLoader(dataset = test_df,batch_size = 128,num_workers = 8,pin_memory=True,shuffle = False)

In [22]:
# len(test_dl)

In [27]:
val.shape

(603600, 58)

In [30]:
train[['pressure','id','breath_id','R','C','u_out']].groupby("breath_id").head(40).head()

(3018000, 6)

In [26]:
for i in [0]:
# for i in [0]:
    val = train.iloc[folds[i][1]]
    val_df = VentilatorDataClassification(group_dict = train_grp_dict, breath_df = val[['breath_id']].drop_duplicates()['breath_id'],
                                          categorical_columns = config.dataset.train.kwargs.categorical_columns, 
                                          numerical_columns = config.dataset.train.kwargs.numerical_columns, 
                                          target_column=config.dataset.train.kwargs.target_column,shift=0,seq_len=40)
    val_dl = DataLoader(dataset = val_df,batch_size = 256,num_workers = 8,pin_memory=True,shuffle = False)
    
    wt_dict = torch.load(models[i])
#     lit_model = ClassifcationModel(config,mapping = '../pressure_mapper.pkl',topk = 3)
    lit_model = ClassifcationModel(config,mapping = '../sorted_pressure_mapper.pkl',topk = 3)
    lit_model.load_state_dict(state_dict=wt_dict['state_dict'])    
    lit_model.topk=1
    preds = predict(lit_model,val_dl,val[['pressure','id','breath_id','R','C','u_out']],1)
#     oot_preds = predict(lit_model,test_dl,test[['id','breath_id','R','C','u_out']],0,is_test=True)
    print('MAE:  ',mean_absolute_error(preds[preds.u_out==0]['pressure'],preds[preds.u_out==0]['preds']))
#     oof_preds.append(preds)    
#     test_preds.append(oot_preds)        



{'init_type': 'yakama'}
LSTMDpReLu(
  (rnn): LSTM(64, 512, batch_first=True, dropout=0.4, bidirectional=True)
  (dp): Dropout(p=0.4, inplace=False)
)
LSTMDpReLu(
  (rnn): LSTM(1088, 512, batch_first=True, dropout=0.4, bidirectional=True)
  (dp): Dropout(p=0.4, inplace=False)
)
LSTMDpReLu(
  (rnn): LSTM(2112, 512, batch_first=True, dropout=0.4, bidirectional=True)
  (dp): Dropout(p=0.4, inplace=False)
)
LSTMDpReLu(
  (rnn): LSTM(3136, 512, batch_first=True, dropout=0.4, bidirectional=True)
  (dp): Dropout(p=0.4, inplace=False)
)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

  rank_zero_deprecation(


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------
MAE:   4.931593259614221


In [None]:
m = next(iter(val_dl))

1111111     1  414141414141

41 




41
111  4141
1
  1114141 
  41
4141
1

1  141 
114141  1

114141   

4141

1 4111
41  1
14141 1
 
41 41
41
1
11   41114141 
 
1
114141   

414141


111   41141411
 1

1 141 41  41
4141



11  4141

11111    1 41414141 41




4111
  41
41
11111     41414141
1
41
 
1
41 
41
111 1  41 411411141

  
 
414141

1
 411
11   1414141 11

1
41   
41411

 411
411 
 41141
 11141
  
41 411
41
1 
 4141
1
 1 411141
1 1 
41  141
4141 
1

41 
141 1
411 
 1411141 1 
 41
 41
41
41
1
 41
11 11 141 1 141 41
 41 
41141

41 


41
111 1 1 1411 411 41
   
4141
414141




111 11  41 1 41
4141 41


41
1
 411
 11 11411 141   
41 
41414141




1 411
 11141111    
  4141414141
41



1
 41
1 11141   1
41 41
41
14111
 
  414141


11 1 411411 
 
411 411
 
41 141 41

1
411 
 141141
 
41 1
41 
1141 
 1141141  
 
4141411

 
141 1
41 1
141  411411

 
 41411

1  414111
1
  1 4141 41
141
1
 
 41411
1
  4141111

   4114141 
1

141  
4141
11
  41411

 141 111
141    41
414141
1

1 
 41141


In [27]:
m['num'].shape

torch.Size([256, 39, 52])

In [32]:
oof_preds = pd.concat(oof_preds,axis=0)

In [28]:
mean_absolute_error(oof_preds[oof_preds.u_out==0]['pressure'],oof_preds[oof_preds.u_out==0]['preds'])

0.16210800133140657

In [24]:
mean_absolute_error(oof_preds[oof_preds.u_out==0]['pressure'],oof_preds[oof_preds.u_out==0]['preds'])

0.15299057485442744

In [25]:
mean_absolute_error(oof_preds[oof_preds.u_out==0]['pressure'],oof_preds[oof_preds.u_out==0]['preds'])

0.14785470642327017

In [33]:
mean_absolute_error(oof_preds[oof_preds.u_out==0]['pressure'],oof_preds[oof_preds.u_out==0]['preds'])

0.147684052729295

In [26]:
oot_preds = pd.concat([x['preds'] for x in test_preds],axis=1).median(axis=1)
oot_preds = pd.concat([test_preds[0]['id'],oot_preds],axis=1)
oot_preds.columns = ['id','pressure']

In [27]:
sub = pd.read_csv(DATA_DIR+'sample_submission.csv')
sub = sub[['id']].merge(oot_preds,on='id',how='left')
sub.pressure = sub.pressure.fillna(0)

In [35]:
sub.head()

Unnamed: 0,id,pressure
0,1,6.259305
1,2,5.907794
2,3,7.102931
3,4,7.595046
4,5,9.176844


In [28]:
sub.to_csv('../subs/{}.csv'.format(config.experiment_name),index=False)

In [29]:
oof_preds.reset_index(drop=True,inplace=True)

In [30]:
oof_preds.head()

Unnamed: 0,pressure,id,breath_id,R,C,u_out,preds
0,6.259305,241,3,1,1,0,6.259305
1,5.76719,242,3,1,1,0,5.696887
2,6.540513,243,3,1,1,0,6.540513
3,8.649578,244,3,1,1,0,8.860484
4,10.758642,245,3,1,1,0,11.03985


In [31]:
oof_preds = oof_preds[oof_preds.u_out==0]
oof_preds.reset_index(drop=True,inplace=True)

In [32]:
oof_preds.to_feather('../oofs/{}.feather'.format(config.experiment_name))

In [33]:
oof_preds = oof_preds[oof_preds.u_out==0]

In [34]:
oof_preds.head()

Unnamed: 0,pressure,id,breath_id,R,C,u_out,preds
0,6.259305,241,3,1,1,0,6.259305
1,5.76719,242,3,1,1,0,5.696887
2,6.540513,243,3,1,1,0,6.540513
3,8.649578,244,3,1,1,0,8.860484
4,10.758642,245,3,1,1,0,11.03985


In [38]:
(pd.read_csv('../subs/baseline_LSTM_Classification.csv')['pressure']+pd.read_csv('../subs/v2-RNN-classification-top3.csv')['pressure']+
pd.read_csv('../subs/v0-LSTM-classification-top3-smooth-loss.csv')['pressure'])/3

0          6.282739
1          5.907794
2          7.126365
3          7.618480
4          9.141692
             ...   
4023995    0.000000
4023996    0.000000
4023997    0.000000
4023998    0.000000
4023999    0.000000
Name: pressure, Length: 4024000, dtype: float64

In [50]:
m = pd.concat([pd.read_csv('../subs/baseline_LSTM_Classification.csv')['pressure'],pd.read_csv('../subs/v2-RNN-classification-top3.csv')['pressure'],
pd.read_csv('../subs/v0-LSTM-classification-top3-smooth-loss.csv')['pressure']],axis=1)

In [51]:
# m

In [52]:
sub['pressure'] = m.mean(axis=1)

In [53]:
sub.to_csv("avg_lstm_0.17_0.167_0.162.csv",index=False)

In [35]:
(pd.read_csv('../subs/baseline_LSTM_Classification.csv')['pressure']+pd.read_csv('../subs/v2-RNN-classification-top3.csv')['pressure'])/2

0          6.329607
1          5.907794
2          7.173233
3          7.665348
4          9.141692
             ...   
4023995    0.000000
4023996    0.000000
4023997    0.000000
4023998    0.000000
4023999    0.000000
Name: pressure, Length: 4024000, dtype: float64

In [57]:
config = OmegaConf.load('../configs/cnn-lstm-classification.yaml')

In [58]:
config.model

{'class': 'LSTMCNNClassfier', 'kwargs': {'embedding_layer': {'u_out': {'num_embeddings': 2, 'embedding_dim': 32}, 'R': {'num_embeddings': 3, 'embedding_dim': 32}, 'C': {'num_embeddings': 3, 'embedding_dim': 32}}, 'input_dim': 148, 'rnn_layer': {'class': 'LSTM', 'kwargs': {'input_size': 148, 'hidden_size': 512, 'num_layers': 2, 'batch_first': True, 'bidirectional': True, 'dropout': 0.2}}, 'rnn_init': {'class': 'InitRNNWeights', 'kwargs': {'init_type': 'yakama'}}, 'cnn_layer': {'input_layer': {'class': 'Conv1DBnRelu', 'kwargs': {'in_channels': 1024, 'out_channels': 512, 'kernel_size': 3, 'padding': 1}}, 'block1': {'class': 'Conv1DBasicBlock', 'kwargs': {'in_channels': 512, 'out_channels': 512, 'kernel_size': 3, 'padding': 1, 'is_bn': False}}, 'block2': {'class': 'Conv1DBasicBlock', 'kwargs': {'in_channels': 512, 'out_channels': 512, 'kernel_size': 3, 'padding': 1, 'is_bn': False}}, 'block3': {'class': 'Conv1DBasicBlock', 'kwargs': {'in_channels': 512, 'out_channels': 512, 'kernel_size': 

In [59]:
from modellib import LSTMCNNClassfier

In [60]:
config.model.kwargs['output_dim'] = 100

In [61]:
mod = LSTMCNNClassfier(config.model.kwargs)

{'init_type': 'yakama'}
tensor(-8.9367)
tensor(22.9660)


In [62]:
# mod(m)

In [53]:
m = next(iter(val_dl))

In [63]:
preds = mod(m)