In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526417507/1526417507.csv', header = None)
data = data.T
data2 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526591202/1526591202.csv', header = None)
data2 = data2.T
data3 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject42_1527280030/1527280030.csv', header = None)
data3 = data3.T
data4 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject43_1527806941/1527806941.csv', header = None)
data4 = data4.T
data5 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject54_1539288817/1539288817.csv', header = None)
data5 = data5.T
data6 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject55_1539459892/1539459892.csv', header = None)
data6 = data6.T
traindata = pd.concat([data,data2,data3,data4,data5,data6],ignore_index = True)

In [None]:
data = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526417507/breathrates.csv', header = None)
data = data.T
data = data.drop(0)
data2 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject39_1526591202/breathrates.csv', header = None)
data2 = data2.T
data2 = data2.drop(0)
data3 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject42_1527280030/breathrates.csv', header = None)
data3 = data3.T
data3 = data3.drop(0)
data4 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject43_1527806941/breathrates.csv', header = None)
data4 = data4.T
data4 = data4.drop(0)
data5 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject54_1539288817/breathrates.csv', header = None)
data5 = data5.T
data5 = data5.drop(0)
data6 = pd.read_csv('/content/drive/MyDrive/hackathon/ground_truth/Subject55_1539459892/breathrates.csv', header = None)
data6 = data6.T
data6 = data6.drop(0)
target_values = pd.concat([data,data2,data3,data4,data5,data6],ignore_index = True)

In [None]:
class LSTM(nn.Module):
  def __init__(self):
    super(LSTM, self).__init__()
    self.lstm = nn.LSTM(input_size = 6780, hidden_size = 512, batch_first = True)
    self.linear1 = nn.Linear(512, 64)
    self.dropout = nn.Dropout(0.5)
    self.linear2 = nn.Linear(64, 1)
    self.relu = nn.ReLU()

  def forward(self, x):
    h_t, c_t = self.lstm(x)
    h_t = h_t.squeeze()
    res = self.linear1(h_t)
    res = self.relu(res)
    res = self.dropout(res)
    res = self.linear2(res)
    res = res.T
    return res

In [None]:
class BreathRateDataset(torch.utils.data.Dataset):
  def __init__(self, train_data, targets = None):
    self.train_data = train_data
    self.targets = targets

  def __len__(self):
    return len(self.train_data)
  
  def __getitem__(self, idx):
    try:
      train, targ = self.train_data.loc[idx], self.targets.loc[idx]
    except:
      train, targ = self.train_data.iloc[idx], self.targets.iloc[idx]
    train = train.values.reshape((1,len(train)))
    targ = targ.values.reshape((1,len(targ)))
    train = train.astype(np.float32)
    targ = targ.astype(np.float32)
    train = torch.tensor(train)
    targ = torch.tensor(targ)
    train = train.view((1, 8, 6780))
    return train, targ

In [None]:
train_data, validation_data, train_target, validation_target = train_test_split(traindata, target_values, test_size=1/6, shuffle=True)

In [None]:
train_dataset = BreathRateDataset(train_data, train_target)
validation_dataset = BreathRateDataset(validation_data, validation_target)

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 1)
validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size = 1)

In [None]:
def train(model, lossfn, optimizer, scheduler, device):
    model.train()
    training_error = 0
    for t, (xb, yb) in enumerate(train_dataloader):
        xb = xb.to(device, dtype = torch.float32)
        yb = yb.to(device, dtype = torch.float32)
        xb = xb.squeeze(0)
        yb = yb.squeeze()
        predictions = model(xb)
        predictions = predictions.T
        predictions = predictions.to(device, dtype = torch.float32)
        #for i in range(8):
        #    if yb[i] == -1:
        #      predictions[i][0] = -1
        loss = lossfn(predictions, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        training_error += torch.sqrt(torch.sum(torch.square(predictions.view(8)-yb))/8)
        #training_error += torch.sum(torch.square(predictions.view(8) - yb))
    print(f"Training RMSE: {training_error/5}")
    #scheduler.step(loss)
    del loss,predictions
    return training_error
        
def validate(model, lossfn, optimizer, scheduler, device):
    validation_error = 0
    with torch.no_grad():
        model.eval()
        for v, (xv, yv) in enumerate(validation_dataloader):
            xv = xv.to(device, dtype = torch.float32)
            yv = yv.to(device, dtype = torch.float32)
            xv = xv.squeeze(0)
            yv = yv.squeeze()
            predictions = model(xv)
            predictions = predictions.T
            predictions = predictions.to(device, dtype = torch.float32)
            loss = lossfn(predictions, yv)
            validation_error += torch.sqrt(torch.sum(torch.square(predictions.view(8) - yv))/8)
    print(f"Validation RMSE: {validation_error/1}")
    return validation_error


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
model = LSTM()
lossfn = torch.nn.functional.mse_loss
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
scheduler=None
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=7, threshold=1e-2, verbose = True)

In [None]:
epochs = 10
model.to(device)
for i in range(epochs):
    print(f"Epoch: {i+1}")
    training_error = train(model, lossfn, optimizer, scheduler, device)
    validation_error = validate(model, lossfn, optimizer, scheduler, device)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
for i in range(epochs):
    print(f"Epoch: {i+1}")
    training_error = train(model, lossfn, optimizer, scheduler, device)
    validation_error = validate(model, lossfn, optimizer, scheduler, device)

In [None]:
with torch.no_grad():
    model.eval()
    for v, (xv, yv) in enumerate(train_dataloader):
        xv = xv.to(device, dtype = torch.float32)
        yv = yv.to(device, dtype = torch.int64)
        xv = xv.squeeze(0)
        yv = yv.squeeze()
        predictions = model(xv)
        predictions = predictions.T
        predictions = predictions.to(device, dtype = torch.float32)
        print(predictions)
        print(yv)

In [None]:
with torch.no_grad():
    model.eval()
    for v, (xv, yv) in enumerate(validation_dataloader):
        xv = xv.to(device, dtype = torch.float32)
        yv = yv.to(device, dtype = torch.int64)
        xv = xv.squeeze(0)
        yv = yv.squeeze()
        predictions = model(xv)
        predictions = predictions.T
        predictions = predictions.to(device, dtype = torch.float32)
        print(predictions)
        print(yv)

In [None]:
predictions[0]

In [None]:
target_values 

In [None]:
class TestDataset(torch.utils.data.Dataset):
  def __init__(self, train_data, targets = None):
    self.train_data = train_data

  def __len__(self):
    return len(self.train_data)
  
  def __getitem__(self, idx):
    try:
      train = self.train_data.loc[idx]
    except:
      train = self.train_data.iloc[idx]
    train = train.values.reshape((1,len(train)))
    train = train.astype(np.float32)
    train = torch.tensor(train)
    train = train.view((1, 8, 6780))
    return train

In [None]:
import os

df = pd.DataFrame()
addresses = []
for root, dir, files in os.walk('/content/drive/MyDrive/hackathon/dataset'):
  if files == []:
    pass
  else:
    path = os.path.join(root, files[0])
    data = pd.read_csv(path, header = None)
    addresses.append(root)    
    data = data.T
    df = pd.concat([df,data], ignore_index = True)

In [None]:
test_dataset = TestDataset(df)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 1, shuffle = True)

In [None]:
with torch.no_grad():
        model.eval()
        for v, xv in enumerate(test_dataloader):
          xv = xv.to(device, dtype = torch.float32)
          xv = xv.squeeze(0)
          predictions = model(xv).view(8)
          predictions = predictions.detach().cpu().numpy()
          predictions = np.round(predictions, 2)
          path = addresses[v]
          time = int(path[-10:])
          timestamps = np.arange(time, time+240, 30)
          predictionsdf = pd.DataFrame([timestamps,predictions])
          predictionsdf = predictionsdf.transpose()

          submission = predictionsdf.to_csv(os.path.join(path,'breathrates.csv'), index = False, header = None)


In [None]:
predictionsdf