In [1]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt

df = pd.read_csv("./data/train.csv")

In [2]:
"""
pre-processing
"""

# time diff
df.rename({"time_step": "t"}, axis=1, inplace=True)
df["t_lag1"] = df.groupby("breath_id")["t"].shift(-1)
df["dt"] = df["t_lag1"] - df["t"]
df["dt"].fillna(method='ffill', inplace=True)
df["dt2"] = df["dt"] ** 2

# u_in - 1st diff and derivative
df["u_in_lag1"] = df.groupby("breath_id")["u_in"].shift(-1).fillna(method="ffill")
df["d1_u_in"] = df["u_in_lag1"] - df["u_in"]
df["prime1_u_in"] = df["d1_u_in"] / df["dt"]

# u_in - 2nd diff and derivative
df["prime1_u_in_lag1"] = df.groupby("breath_id")["prime1_u_in"].shift(-1).fillna(method="ffill")
df["d2_u_in"] = df["prime1_u_in_lag1"] - df["prime1_u_in"]
df["prime2_u_in"] = df["d2_u_in"] / df["dt"]

# integration
df["int1_u_in"] = (df["u_in"] * df["dt"]).cumsum()
df["int2_u_in"] = (df["int1_u_in"] * df["dt"]).cumsum()

"""
re-scaling
"""
from sklearn.preprocessing import StandardScaler

scaling_keys = ["pressure", "u_in", "d1_u_in", "d2_u_in", "prime1_u_in", "prime2_u_in", "int1_u_in", "int2_u_in"]
scalers = {}
for key in scaling_keys:
    scaler = StandardScaler()
    scaler.fit(df[[key]])
    df[[key]]= scaler.transform(df[[key]])
    scalers[key] = scaler

In [3]:
"""
separating R and C
"""

from itertools import product

unique_rs = df["R"].unique()
unique_cs = df["C"].unique()
unique_rcs = list(product(unique_rs, unique_cs))

df_rc_dict = {}
for r, c in unique_rcs:
    df_rc_dict[(r, c)] = df[(df["R"] == r) & (df["C"] == c)]
    
df_rc = df_rc_dict[unique_rcs[0]]
breath_ids = df_rc["breath_id"].unique()
n_breath_ids = len(breath_ids)
n_time_steps = len(df_rc) // n_breath_ids

In [10]:
"""
getting x and y
"""

n_in = 5
cols = ["u_in", "dt", "d1_u_in", "prime1_u_in"]
#cols = ["dt", "dt2", "u_in", "d1_u_in", "prime1_u_in", "d2_u_in", "prime2_u_in", "int1_u_in", "int2_u_in"]
n_cols = len(cols)
n_out = 1

x = np.zeros((n_breath_ids, n_in, n_cols))
y = np.zeros((n_breath_ids, n_out))
for ib, b in enumerate(breath_ids):
    x[ib, :, :] = df_rc[df_rc["breath_id"] == b][cols].iloc[:n_in]
    y[ib, :] = df_rc[df_rc["breath_id"] == b]["pressure"].iloc[:n_out]
x = x.reshape(n_breath_ids, n_in * n_cols)

In [11]:
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.autograd import Variable
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score

class MyNet(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.dropout = nn.Dropout(p=0.1)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(n_in * n_cols, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, n_out)
        
    def forward(self, x_in):
        x = self.fc1(x_in)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x
    
    def reset_parameters(self):
        for layer in self.children():
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()
                
    def train(self, x, y):
        opt = Adam(self.parameters())
        loss_fon = nn.MSELoss()
        for epoch in range(1000):
            opt.zero_grad()
            yp = self(x)
            loss = loss_fon(yp, y)
            loss.backward()
            opt.step()
            print(epoch, loss.item(), end=' '*10+'\r')
        
def get_score(model, x, y):
    kfold = KFold(n_splits=10, shuffle=True)
    scores = list()
    for fold, (train_ids, test_ids) in enumerate(kfold.split(x)):
        model.reset_parameters()
        x_train, x_test = x[train_ids], x[test_ids]
        y_train, y_test = y[train_ids], y[test_ids]
        model.train(x_train, y_train)
        yp = model(x_test)
        score = r2_score(y_test.detach(), yp.detach())
        scores.append(score)
        print("model", type(model).__name__, "fold", fold, "score", score)
    return np.mean(scores), np.std(scores)
    
model = MyNet().double()
x_torch = torch.tensor(x[:, :])
y_torch = torch.tensor(y[:, :])
model.train(x_torch, y_torch)
yp = model(x_torch)
print("\n", r2_score(y_torch.detach(), yp.detach()))
print("nn", get_score(model, x_torch, y_torch))

999 0.001083416290113795           
 0.5133740777893359
model MyNet fold 0 score 0.49095344911546435
model MyNet fold 1 score 0.4681950096182709
model MyNet fold 2 score 0.42558414729667526
model MyNet fold 3 score 0.56391366258484270870451957432          
model MyNet fold 4 score 0.4684340069048465
model MyNet fold 5 score 0.51190912658313536056830735657          
model MyNet fold 6 score 0.49199752678784336604471958757          
model MyNet fold 7 score 0.51418817273513087331364793941          
model MyNet fold 8 score 0.4869130639209738
model MyNet fold 9 score 0.46874440765231695
nn (0.48908325731995, 0.03475065923879263)
