<a href="https://colab.research.google.com/github/linshaochieh2019/test/blob/main/ventilator_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm

import time
from datetime import datetime

import torch
from torch import nn
from torch.utils.data import random_split, DataLoader, Dataset

from copy import deepcopy

In [3]:
df_csv = '/content/drive/MyDrive/Data/ventilator1/df_prep.csv'
df = pd.read_csv(df_csv).iloc[:, 1:]

rc_map = '/content/drive/MyDrive/Data/ventilator1/rcmap.csv'
rc_map = pd.read_csv(rc_map).iloc[:, 1:]

In [6]:
class MyDataset(Dataset):
  def __init__(self, input, rc_map, seq_len):
    self.input = input
    self.rc_map = rc_map
    self.seq_len = seq_len
    
  def __getitem__(self, item):

    # get breath_id
    breath_id_temp = int(self.input[item][0].item())
    rc = self.rc_map[self.rc_map['breath_id'] == breath_id_temp][['R','C']].values.squeeze()

    data = {
        'breath_indices': self.input[item:item + self.seq_len, 0],
        'inputs': self.input[item:item + self.seq_len, 1:],
        'targets': self.input[item + self.seq_len][1], #pressure
        'rc': rc
    }

    return data
  
  def __len__(self):
    return len(self.input) - self.seq_len

In [24]:
# scale pressure and u_in
from sklearn.preprocessing import StandardScaler

ss_p = StandardScaler()
ss_p.fit(np.array(df['pressure'])[:, np.newaxis])
df['pressure'] = ss_p.transform(np.array(df['pressure'])[:, np.newaxis])

ss_u_in = StandardScaler()
ss_u_in.fit(np.array(df['u_in'])[:, np.newaxis])
df['u_in'] = ss_u_in.transform(np.array(df['u_in'])[:, np.newaxis])

In [25]:
# We got too much data. Sampling just 10% for training

input = df.iloc[:, :4].to_numpy() #breath_id, pressure, u_in, u_out
input = torch.tensor(input, dtype=torch.float)

dataset = MyDataset(input=input, rc_map=rc_map, seq_len=5)
print(dataset[10])
print(len(dataset))

split_ratio = 0.9
train_len = int(len(dataset) * split_ratio)
val_len = len(dataset) - train_len

_, train_set = random_split(dataset, 
                            lengths=[train_len, val_len], 
                            generator=torch.Generator().manual_seed(1))

#train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
#val_loader = DataLoader(val_set, batch_size=16, shuffle=False)

{'breath_indices': tensor([1., 1., 1., 1., 1.]), 'inputs': tensor([[0.4949, 1.5865, 0.0000],
        [0.5035, 1.5980, 0.0000],
        [0.6067, 1.5539, 0.0000],
        [0.6970, 1.4944, 0.0000],
        [0.6497, 1.5219, 0.0000]]), 'targets': tensor(0.6067), 'rc': array([1, 2])}
7469545


In [36]:
# Only for trial
#train_len = int(len(train_set) * split_ratio)
#val_len = len(train_set) - train_len

#train_set, val_set = random_split(train_set, 
#                                  lengths=[train_len, val_len], 
#                                  generator=torch.Generator().manual_seed(1))

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)

In [32]:
class RNNModel(nn.Module):
  def __init__(self, input_dim=3, dense_dim=128, lstm_dim=128, linear_dim=32): #input_dim: pressure, u_in, u_out
    super().__init__()

    self.mlp = nn.Sequential(
        nn.Linear(input_dim, dense_dim // 2),
        nn.ReLU(),
        nn.Linear(dense_dim // 2, dense_dim),
        nn.ReLU(),
        )

    self.lstm = nn.LSTM(dense_dim, lstm_dim, batch_first=True)

  def forward(self, x):
    features = self.mlp(x)
    features, (h,c) = self.lstm(features)
    return h.squeeze()

class DNNModel(nn.Module):
  def __init__(self, input_dim=130): #feature shape 128 + [[r,c]] => 130 dims
    super().__init__()
    self.linear = nn.Sequential(
            nn.Linear(input_dim, input_dim//4),
            nn.ReLU(),
            nn.Linear(input_dim//4, 1), #num_classes
            )
    
  def forward(self, x):
    logits = self.linear(x)
    return logits

In [33]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('GPU running: {}'.format(torch.cuda.get_device_name()))

# Setup model and optimizer
rnn = RNNModel()
rnn.to(device)

dnn = DNNModel()
dnn.to(device)

GPU running: Tesla P100-PCIE-16GB


DNNModel(
  (linear): Sequential(
    (0): Linear(in_features=130, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [34]:
import torch.optim as optim
criterion = nn.HuberLoss()
optimizer = optim.Adam(list(rnn.parameters()) + list(dnn.parameters()), lr=1e-3)

In [None]:
# training
train_stats = {
    'train_loss': [],
    'val_loss': []
}

best_val_loss = 1.

for epoch in range(100):
  print('Epoch #{}'.format(epoch))  
  rnn.train()
  dnn.train()
  train_loss = 0.
  
  for batch in tqdm(train_loader):
    breath_indices = batch['breath_indices']
    inputs = batch['inputs'].to(device)
    targets = batch['targets'].to(device)
    rc = batch['rc'].to(device)
    
    # lstm
    features = rnn(inputs)

    # concat features and rc
    input_cat = torch.cat((features, rc), dim=1)
    logits = dnn(input_cat).squeeze()

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    loss = criterion(logits, targets)
    train_loss += loss.item()
    loss.backward()
    optimizer.step()

  avg_train_loss = train_loss / len(train_loader)  
  
  # Validation phase
  rnn.eval()
  dnn.eval()
  val_loss = 0.
  
  with torch.no_grad():
    for batch in val_loader:
      breath_indices = batch['breath_indices']
      inputs = batch['inputs'].to(device)
      targets = batch['targets'].to(device)
      rc = batch['rc'].to(device)
      
      # lstm
      features = rnn(inputs)

      # concat features and rc
      input_cat = torch.cat((features, rc), dim=1)
      logits = dnn(input_cat).squeeze()

      # calculate loss
      loss = criterion(logits, targets)
      val_loss += loss.item()
      
    avg_val_loss = val_loss / len(val_loader)  

    # save model
    if avg_val_loss < best_val_loss:
      best_val_loss = avg_val_loss
      print('Saving model weights ...')
      weights_dir = '/content/drive/MyDrive/Colab Notebooks/ventilator/weights/'
      exp_num = 'v1'
      best_rnn = deepcopy(rnn.state_dict())
      best_dnn = deepcopy(dnn.state_dict())
      torch.save(best_rnn, weights_dir + exp_num + '_rnn.pt')
      torch.save(best_dnn, weights_dir + exp_num + '_dnn.pt')

    print('\n')
    print('train_loss: {:.6f}/ val_loss: {:.6f}'.format(avg_train_loss, avg_val_loss))
    print('\n')

  # save training stats
  train_stats['train_loss'].append(avg_train_loss)
  train_stats['val_loss'].append(avg_val_loss)

Epoch #0


  3%|â–Ž         | 691/21009 [00:29<14:16, 23.73it/s]

In [None]:
# plot the training outcome
import matplotlib.pyplot as plt
plt.plot(train_stats['train_loss'])
plt.plot(train_stats['val_loss'], color='red', linestyle='--')

print('Best val_loss: {:.6f}'.format(best_val_loss))