<a href="https://colab.research.google.com/github/t-willi/Simula/blob/main/AE_v5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn
import matplotlib.pyplot as plt
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from timeit import default_timer as timer 
import glob
import torch.optim as optim
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler
from random import shuffle

import requests
import zipfile
from pathlib import Path

if torch.cuda.is_available()==True:
  device="cuda:0"
else:
  device ="cpu"

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
# Setup path to data folder
data_path = Path("data/")
train_path = data_path / "train_data"


# If the image folder doesn't exist, download it and prepare it... 
if train_path.is_dir():
    print(f"{train_path} directory exists.")
else:
    print(f"Did not find {train_path} directory, creating one...")
    train_path.mkdir(parents=True, exist_ok=True)

    # importing the "tarfile" module
    import tarfile
    # open file
    file = tarfile.open("/content/gdrive/MyDrive/Simula/pulse2pulse_150k.tar.gz")
    # extracting file
    file.extractall(train_path)
    file.close()

Did not find data/train_data directory, creating one...


In [None]:
# from tqdm.auto import tqdm
# max_list=[]
# data_dir_test="/content/data/train_data/from_006_chkp_2500_150k"
# #data_dir_train = "/content/data/train_data/sub_data/train"
# files=glob.glob(data_dir_test + '/*.asc')
# #files_train=glob.glob(data_dir_train + '/*.asc')
# #files=[*files_test,*files_train]
# for file in tqdm(files):
#   temp_df=pd.read_csv(file,sep=" ",header=None)
#   max=temp_df.max().max()
#   max_list.append(max)

In [None]:
# Create custom dataset class to load ECG data into dataset, containing 
# input tensor with lead 1 and output tensor with desired other leads
class Custom_dataset(Dataset):
    def __init__(self, data_dir,max_value=5000,column=3,split=False,train=True):
      #get all files from directory loaded in all_files list
      self.column=column
      self.max_value=max_value
      self.files = glob.glob(data_dir + '/*.asc')
      self.len=len(self.files)
      self.cut=int(self.len*0.8)
      self.train_files=self.files[0:self.cut]
      self.test_files=self.files[self.cut:self.len]
      self.train=train
      self.split=split

    def __len__(self):
      if self.split is True:
        if self.train is True:
          return len(self.train_files)
        if self.train is not True:
          return len(self.test_files)
      if self.split is not True:
        return len(self.files)

    def __getitem__(self,idx):
      #turn list of dataframes into Tensor
      if self.split is True:
        if self.train is True:
          temp_df=pd.read_csv(self.train_files[idx],sep=" ")
        if self.train is not True:
          temp_df=pd.read_csv(self.test_files[idx],sep=" ")
      if self.split is not True:
        temp_df=pd.read_csv(self.files[idx],sep=" ")
      #temp_df/=self.max_value
      #load input tensor
      temp_list_in=temp_df.iloc[:,1]
      #temp_list_in=normalize([temp_list_in], norm="max")
      temp_tensor_in = torch.tensor(temp_list_in,dtype=torch.float32)
      temp_tensor_in=temp_tensor_in.unsqueeze(0)
      #load label Tensor
      temp_list_out=temp_df.iloc[:,self.column].values
      #temp_list_out=normalize([temp_list_out], norm="max")
      temp_tensor_out=torch.tensor(temp_list_out,dtype=torch.float32)
      temp_tensor_out=temp_tensor_out.unsqueeze(0)
      #combine input and label and output
      temp_tensor_pair= temp_tensor_in,temp_tensor_out
      return temp_tensor_pair

In [None]:
data_dir="/content/data/train_data/from_006_chkp_2500_150k"
train_dataset = Custom_dataset(data_dir=data_dir,split=True,train=True)
test_dataset = Custom_dataset(data_dir=data_dir,split=True,train=False)

In [None]:
from torch.utils.data.dataloader import DataLoader
BATCH_SIZE = 32*32
#turn datasets into iterables
train_dataloader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True
                              )
test_dataloader = DataLoader(test_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True
                              )

In [None]:
"""
Here, we define the autoencoder model.This model is taken from "https://github.com/L1aoXingyu/pytorch-beginner/blob/master/08-AutoEncoder/simple_autoencoder.py"
"""
class ECG_AE_v1(nn.Module):
    def __init__(self):
        super(ECG_AE_v1, self).__init__()
        self.AE = nn.Sequential(
            nn.Linear(4999, 128),
            nn.ReLU(),
            nn.Linear(128,20),
            nn.ReLU(),
            nn.Linear(20,10),
            nn.ReLU(),
            nn.Linear(10,20),
            nn.ReLU(),
            nn.Linear(20,128),
            nn.ReLU(),
            nn.Linear(128,4999),
        )


    def forward(self, x):
        x = self.AE(x)
        return x


model = ECG_AE_v1().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



In [None]:
def train_model(Epochs=20,model=model,train_dataloader=train_dataloader,test_dataloader=test_dataloader):
  epoch_count = []
  test_count = []
  train_count = []
  from tqdm.auto import tqdm
  #train and test loop
  for epoch in tqdm(range(Epochs)):
    print(f"Epoch:{epoch}")
    train_loss=0
    for batch, (X,y) in enumerate(tqdm(train_dataloader)):
      #print("doing train loop")
      X, y = X.to(device), y.to(device) 
      model.train()
      output=model(X)
      loss = criterion(output,y)
      #print(loss)
      train_loss += loss
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      #print(train_loss)
    #average loss per batch
    train_loss /= len(train_dataloader)
    #start testing
    test_loss = 0
    model.eval()
    with torch.inference_mode():
      for X,y in test_dataloader:
        #print("doing test loop")
        X, y = X.to(device), y.to(device) 
        test_pred = model(X)
        ##reshaping output from linear to 2d from 40000 to 8 times 5000
        #test_pred=torch.reshape(test_pred,(10,1,8,4999))
        ######
        test_loss += criterion(test_pred,y)    
      test_loss /= len(test_dataloader)

    #print what is happening
    if epoch % 5 == 0:
      print(f"\nTrain loss: {train_loss:.5f} |test_loss:{test_loss}" )
      epoch_count.append(epoch)
      test_count.append(test_loss.item())
      train_count.append(train_loss.item())
    if epoch % 5 == 0:
      print("saving model")
      torch.save(model.state_dict(), "/content/gdrive/MyDrive/Simula/model_outcome/model1")
      training_progress = pd.DataFrame(
    {'Epoch': epoch_count,
     'Train_loss': train_count,
     'Test_loss': test_count
    })
      training_progress.to_csv("/content/gdrive/MyDrive/Simula/model_outcome/progress.csv")


In [None]:
train_model(10)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch:0


  0%|          | 0/24 [00:00<?, ?it/s]


Train loss: 65005.20703 |test_loss:64985.80078125
saving model
Epoch:1


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch:2


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch:3


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch:4


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch:5


  0%|          | 0/24 [00:00<?, ?it/s]


Train loss: 64987.90234 |test_loss:64982.57421875
saving model
Epoch:6


  0%|          | 0/24 [00:00<?, ?it/s]

ParserError: ignored

In [None]:
torch.save(model.state_dict(), "/content/gdrive/MyDrive/Simula/model outcome/model1")

In [None]:
input,output = test_dataset[0]
input=input.tolist()
data=input[0]
type(data)
plt.plot(data)
# plt.title("Input ECG lead1")

In [None]:
X,y=test_dataset[0]
model.to("cpu")
model.eval()
with torch.inference_mode():
  output=model(X)
output=output.tolist()
data=output[0]
plt.plot(data)
plt.title("Output ECG trained for 20epochs on 50 datapoints")