In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import numpy as np
import pandas as pd

In [None]:
class EpilepsyDataset(Dataset):
    def __init__(self, path_to_data: str, verbose: bool=True):
        if os.path.exists(path_to_data) is False:
            raise ValueError('There is no such path')
            
        self.path = path_to_data
        self.folders_with_patients = os.listdir(self.path)
        self.verbose = verbose
        
        self.patients_data = []
        for patient in self.folders_with_patients:
            self.patients_data.append(os.listdir(os.path.join(self.path + patient)))
            
        self._sorted_data = self._get_sorted_data()
        
    def get_patients_data(self, patient: str) -> list[str]:
        return os.listdir(os.path.join(self.path, patient))
    
    def get_patients_names(self) -> list[str]:
        return os.listdir(self.path)
    
    def get_all_sensors_records_for_patient(self, patient: str) -> list[str]:
        if patient not in self.get_patients_names():
            raise ValueError('There is no such name')
         
        full_path_to_patient = os.path.join(self.path, patient) 
        
        return [name for name in os.listdir(full_path_to_patient) 
                if os.path.isdir(os.path.join(full_path_to_patient, name)) 
                and name[0] != '.']
    
    def _get_sorted_segments(self, sensor_folder: str) -> list[list]:
        """
        Get types of sensors from folder and sort its data by number of segment
        """
        
        delimiter = '_'
        full_path = os.path.join(self.path, sensor_folder)
        sensors_files_names = [file.split(delimiter) for file in os.listdir(full_path)]

        cropped_sensors_files_names = [file[:-2] for file in sensors_files_names]
        
        index_of_parameters = 3
        unique_parameters = set(file[index_of_parameters] for file in cropped_sensors_files_names)

        return_data = []
        for unique_item in unique_parameters:
            data = [file for file in sensors_files_names if unique_item in file]
            index_of_segments_number = 5
            sorted_list = sorted(data, key=lambda x: int(x[index_of_segments_number].split('.')[0]))
            return_data.append(['_'.join(file) for file in sorted_list])
    
        return return_data
    
    def _read_segment(self, path_to_segment: str):
        return pd.read_parquet(path_to_segment)
    
    def _upsample(self, data: np.array, sample_rate: float, new_sample_rate: float, mode: str = 'bicubic'):
        scale_factor = new_sample_rate / sample_rate
        upsampler = nn.Upsample(scale_factor, mode)
        return upsampler(data)
    
    def _get_sorted_data(self):
        """
        Return path to segments sorted by its index
        """
        self._dataset_data = []
        print(self.folders_with_patients)
        for patient in self.folders_with_patients:
            if self.verbose:
                print('Patient: {}'.format(patient))
                
            sensors = self.get_all_sensors_records_for_patient(patient)
            data = []
            for sensor in sensors:
                records = self._get_sorted_segments(os.path.join(patient, sensor))
                
                full_path_records = []
                for sensor_record in records:
                    full_path_records.append(list(map(lambda x: os.path.join(self.path, patient, sensor, x), sensor_record)))                                
                    data.append(full_path_records)
                    
            self._dataset_data.append(data)        

        return self._dataset_data
    
    def __len__(self):
        pass

    def __getitem__(self, idx):
        pass
    
    def _get_fullconnected_data_signals(self):
        """
        Get full connected segments data from all sensor from folders
        """
        combine_signals = {} 
        for patient in self._sorted_data:
            for sensor in patient:
                signal_combined_list =[]
                
                for signal in sensor:
                    dfs=[]
                    key_name = re.sub("_segment_\d+.parquet", "", signal[0])
                    key_name = re.sub("/.+/.+/", "", key_name)
                    for segment in signal:
                        df = pd.read_parquet(segment)

                        # Append the dataframe to the list
                        dfs.append(df)
                    combined_signal = pd.concat(dfs, axis=0)
                    print(key_name)
                    print(combined_signal)
                    combine_signals[key_name] = combined_signal
        return combine_signals
        

In [None]:
dataset = EpilepsyDataset('/workspace/data_seerpy/data_seerpy/data/')
#print(dataset._sorted_data)

In [23]:
a = pd.read_parquet("/workspace/data_seerpy/data_seerpy/data/MSEL_01842/Empatica-TEMP/MSEL_01842_Empatica-TEMP_TEMP_segment_100.parquet")

In [24]:
print(a)

             time          data
0    1.558209e+12  3.441002e+10
1    1.558209e+12  3.441130e+10
2    1.558209e+12  3.441258e+10
3    1.558209e+12  3.441385e+10
4    1.558209e+12  3.441514e+10
..            ...           ...
539  1.558209e+12  3.441594e+10
540  1.558209e+12  3.441482e+10
541  1.558209e+12  3.441354e+10
542  1.558209e+12  3.441226e+10
543  1.558209e+12  3.441098e+10

[544 rows x 2 columns]
