In [1]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt

df = pd.read_csv("./data/train.csv")

In [2]:
from sklearn.preprocessing import StandardScaler

scalers = {}
for key in ["pressure", "u_in"]:
    scaler = StandardScaler()
    scaler.fit(df[[key]])
    df[[key]]= scaler.transform(df[[key]])
    scalers[key] = scaler

In [2]:
from itertools import product

unique_rs = df["R"].unique()
unique_cs = df["C"].unique()
unique_rcs = list(product(unique_rs, unique_cs))

df_rc_dict = {}
for r, c in unique_rcs:
    df_rc_dict[(r, c)] = df[(df["R"] == r) & (df["C"] == c)]

In [3]:
df_rc = df_rc_dict[unique_rcs[0]]
breath_ids = df_rc["breath_id"].unique()
n_breath_ids = len(breath_ids)
n_time_steps = len(df_rc) // n_breath_ids

n_in = 10
n_out = 3

x_rc = np.zeros((n_breath_ids, 1, n_in))
y_rc = np.zeros((n_breath_ids, n_out))
for ib, b in enumerate(breath_ids):
    mask = df_rc["breath_id"] == b
    x_rc[ib, 0, :] = df_rc[mask]["u_in"][:n_in]
    y_rc[ib, :n_out] = df_rc[mask]["pressure"][:n_out]

In [4]:
import torch.nn as nn
from torch.optim import Adam
from torch.autograd import Variable
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error

class MyNet(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        n_channels = 16
        self.conv = nn.Conv1d(1, n_channels, kernel_size=5, stride=1)
        s = (n_in - self.conv.kernel_size[0])
        self.pool = nn.AvgPool1d(kernel_size=s)
        self.fc1 = nn.Linear(n_channels, n_out)
        s = self.fc1.out_features
        self.fc2 = nn.Linear(s, n_out)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        #x = self.relu(x)
        #x = self.fc2(x)
        #x = self.relu(x)
        return x
    
    def reset_parameters(self):
        for layer in [self.conv, self.fc1, self.fc2]:
            layer.reset_parameters()
    
def train(model, x, y):
    opt = Adam(model.parameters(), lr=0.1)
    loss_fon = nn.MSELoss()
    for epoch in range(500):
        x = Variable(x)
        y = Variable(y)
        opt.zero_grad()
        yp = model(x)
        loss = loss_fon(yp, y)
        loss.backward()
        opt.step()
        print(epoch, loss.item(), end=' '*10+'\r')

def score(model, x, y):
    kfold = KFold(n_splits=3, shuffle=True)
    scores = list()
    model.reset_parameters()
    for fold, (train_ids, test_ids) in enumerate(kfold.split(x_rc)):
        x_train = x[train_ids]
        x_test = x[test_ids]
        y_train = y[train_ids]
        y_test = y[test_ids]
        train(model, x_train, y_train)
        y_test_p = model(x_test)
        scores.append(mean_absolute_error(y_test.detach(), y_test_p.detach()))
    return np.mean(scores), np.std(scores)
            
x = torch.tensor(x_rc)
y = torch.tensor(y_rc)
model = MyNet().double()
score(model, x, y)

499 0.4161070135160492           

(0.4732282658678199, 0.0019004806216727779)

In [15]:
yp = model(x)
print((y[:10,:]-yp[:10,:])/y[:10,:])

tensor([[ 0.0285,  0.0067, -0.0953],
        [ 0.1528,  0.1422,  0.2317],
        [-0.0265, -0.4257,  0.3177],
        [ 0.0307, -0.2420, -0.0056],
        [-0.0418, -0.0193, -0.1689],
        [-0.0400, -0.0684,  0.0602],
        [-0.1201, -0.1943, -1.8919],
        [-0.0539, -0.0736,  0.0059],
        [ 0.0642,  0.0225,  0.0471],
        [-0.0541, -0.0455, -0.0477]], dtype=torch.float64,
       grad_fn=<DivBackward0>)
