In [17]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn.functional as F
import numpy as np
import math
import pickle

In [133]:

class rando(Dataset):
    """
    This dataloader loads random 5 minute intervals from a random patient.
    """
    def __init__(self, ls_len, seed = None):
        """
        Args:
            path (str): path to the input & target folder.
            series_dict (list): name of dict for data.
            size : (number of experiments, number of max. channels, longest series)
            device (class 'torch.device'): which pytorch device the data should
            be sent to.
        """


        self.length = ls_len
        self.seed = seed
        
        if not(seed):
            self.rng = np.random.default_rng(self.seed)
            self.gen = iter(self.create_data(self.rng))
        

    def create_data(self, rng):
        while True:
            ind = rng.choice(10, 1)
            yield ind

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        if self.seed:
            if idx == 0:
                self.rng = np.random.default_rng(self.seed)
                self.gen = iter(self.create_data(self.rng))
            
            
        tal = next(self.gen)
        return tal


In [154]:
train_file = rando(ls_len = 6)
train_load = torch.utils.data.DataLoader(train_file,
                                           batch_size=3,
                                           shuffle=True,
                                           num_workers=0)

val_file = rando(ls_len = 10, seed = 1)
val_loader = torch.utils.data.DataLoader(val_file,
                                           batch_size=5,
                                           shuffle=False,
                                           num_workers=0)

for i in train_load:
    print(i)

print()
    
for i in val_loader:
    print(i)
    
print()
    
for i in train_load:
    print(i)

print()

for i in val_loader:
    print(i)
    

tensor([[2],
        [9],
        [3]])
tensor([[5],
        [4],
        [4]])

tensor([[4],
        [5],
        [7],
        [9],
        [0]])
tensor([[1],
        [8],
        [9],
        [2],
        [3]])

tensor([[0],
        [1],
        [5]])
tensor([[3],
        [3],
        [4]])

tensor([[4],
        [5],
        [7],
        [9],
        [0]])
tensor([[1],
        [8],
        [9],
        [2],
        [3]])


In [130]:
9 % (10-1)

0

In [202]:
class shuffle_5min(Dataset):
    """
    This dataloader loads random 5 minute intervals from a random patient.
    """
    def __init__(self, path: str, series_dict: str, size: tuple, device, seed = None, length = None):
        """
        Args:
            path (str): path to the input & target folder.
            series_dict (list): name of dict for data.
            size : (number of experiments, number of max. channels, longest series)
            device (class 'torch.device'): which pytorch device the data should
            be sent to.
        """

        self.device = device
        self.size = size
        self.path = path
        self.seed = seed
        
        with open(path + "/" + series_dict, 'rb') as handle:
            self.s_dict = pickle.load(handle)

        self.input_data = np.memmap(self.path + "/model_input.dat", dtype='float32', mode='r', shape=self.size)
        self.target_data = np.memmap(self.path + "/model_target.dat", dtype='float32', mode='r', shape=self.size)

        prop = [] # list with probabilities

        ss = 0 # sum over all the batches
        for val in self.s_dict.values():
            prop.append(val[2])
            ss += val[2]

        self.prop = np.array(prop) / ss
        
        if length:
            self.length = length
        else:
            self.length = ss

        
        if not(seed):
            self.rng = np.random.default_rng(self.seed)
            self.gen = iter(self.create_data(self.s_dict, self.rng))
        



    def create_data(self, s_dict, rng):
        while True:
            ind = rng.choice(self.size[0], 1, p = self.prop)
            shp = s_dict[ind[0] + 1][3] # shape of experiment

            cut_point = rng.integers(low = 200*30, #remove the first 30 secs
                                high = shp[1] - 5*200*60, size = 1)
                                # choose the place to cut

            chan = rng.choice(shp[0], 1)

            inp = self.input_data[ind[0], chan[0], cut_point[0]:cut_point[0]+60*5*200]
            inp = torch.tensor(inp).view(1, 60*5*200)
            tar = self.target_data[ind[0], chan[0], cut_point[0]:cut_point[0]+60*5*200]
            tar = torch.tensor(tar).view(1, 60*5*200)
            # #inp = self.ls[0][0][chan][cut_point[i]:cut_point[i]+60*5*200]
            # #tar = self.ls[1][0][chan][cut_point[i]:cut_point[i]+60*5*200]

            #tar = torch.cat((tar[0], -1*(tar[0] - 1))).view(2, 60*5*200)
            yield inp, tar, (ind[0], chan[0], cut_point[0])


    def clear_ram(self, index):
        """
        This function is for clearing the ram.
        """
        if index % 1000 == 0:
            del self.input_data
            del self.target_data
            self.input_data = np.memmap(self.path + "/model_input.dat", dtype='float32', mode='r', shape=self.size)
            self.target_data = np.memmap(self.path + "/model_target.dat", dtype='float32', mode='r', shape=self.size)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        if self.seed:
            if idx == 0:
                self.rng = np.random.default_rng(self.seed)
                self.gen = iter(self.create_data(self.s_dict, self.rng))
        
        inp, tar, chan = next(self.gen)
        inp = inp.to(self.device)
        tar = tar.to(self.device)
        self.clear_ram(idx)
        return inp, tar, chan


In [157]:
batch_size = 20

train_path = "C:/Users/Marc/Desktop/model_data/train_model_data"
val_path = "C:/Users/Marc/Desktop/model_data/val_model_data"

train_load_file = shuffle_5min(path = train_path,
                                     series_dict = 'train_series_length.pickle',
                                     size = (195, 22, 2060000),
                                     device = device)


train_loader = torch.utils.data.DataLoader(train_load_file,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=0)

val_load_file = shuffle_5min(path = val_path,
                                     series_dict = 'val_series_length.pickle',
                                     size = (28, 22, 549200),
                                     device = device)


val_loader = torch.utils.data.DataLoader(val_load_file,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=0)

NameError: name 'device' is not defined

In [200]:
rng = np.random.default_rng(42)

print(rng.choice(3, 1, p = [0, 0.5, 0.5]))
print(rng.integers(low = 0, high = 3, size = 1))
print(rng.choice(10, 1))

print(rng.choice(3, 1, p = [0, 0.5, 0.5]))
print(rng.integers(low = 0, high = 3, size = 1))
print(rng.choice(10, 1))

print()

rng = np.random.default_rng(42)

print(rng.choice(3, 1, p = [0, 0.5, 0.5]))
print(rng.integers(low = 0, high = 3, size = 1))
print(rng.choice(10, 1))

print(rng.choice(3, 1, p = [0, 0.5, 0.5]))
print(rng.integers(low = 0, high = 3, size = 1))
print(rng.choice(10, 1))

[2]
[1]
[4]
[2]
[0]
[6]

[2]
[1]
[4]
[2]
[0]
[6]
