In [1]:
# Load data
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from Inner_Speech_Dataset.Python_Processing.Data_extractions import  Extract_data_from_subject

# Load all data for subject
def load_subject(subject_nr):
    datatype = "EEG"
    root_dir = "../dataset"

    data, description = Extract_data_from_subject(root_dir, subject_nr, datatype)
    return data, description

    
# Load all data for subject
def load_subject_non_downsampled(subject_nr):
    datatype = "EEG"
    root_dir = "./Inner_Speech_Dataset"

    data, description = Extract_data_from_subject(root_dir, subject_nr, datatype)
    return data, description

# Extract labels from the description
def extract_labels(desc):
    return desc[:,1]


# Test when extracting only the action interval
def extract_action_interval(data, hz):
    if hz == 254:
        return data[:,:,254:890]
    elif hz == 1024:
        return data[:,:,1024:3584]


# Choose only inner speech paradigm
def get_innerspeech(data, description):
    temp = description[:, 2] == 1
    return data[temp],description[temp]
        
# Will split data stratifying with label distribution
def split_data(data, labels):
    trainv_data, test_data, trainv_labels, test_labels = train_test_split(data, labels, test_size = 0.1, random_state = 2, shuffle = True, stratify = labels)# 10% test data
    train_data, val_data, train_labels, val_labels = train_test_split(trainv_data, trainv_labels, test_size = 0.12, random_state = 2, shuffle = True, stratify = trainv_labels) # Approx 10% val data
    return train_data, val_data, test_data, train_labels, val_labels, test_labels

# Will squeeze to correct shape and move to device
def to_device(train_dat, val_dat, test_dat, train_lab, val_lab, test_lab, device):
    train_data = torch.unsqueeze(torch.tensor(train_dat, dtype = torch.float32, device = device),1)
    train_labels = torch.tensor(train_lab, dtype = torch.long, device = device)

    val_data = torch.unsqueeze(torch.tensor(val_dat, dtype = torch.float32, device = device),1)
    val_labels = torch.tensor(val_lab, dtype = torch.long, device = device)

    test_data = torch.unsqueeze(torch.tensor(test_dat, dtype = torch.float32, device = device),1)
    test_labels = torch.tensor(test_lab, dtype = torch.long, device = device)

    #test_data = torch.tensor(test_data)
    #test_data = torch.unsqueeze(test_data,1).float()
    #test_labels = torch.tensor(test_labels).long()
    return train_data, val_data, test_data, train_labels, val_labels, test_labels

# Will squeeze to correct shape and move to device
def to_device_noval(train_dat, test_dat, train_lab, test_lab, device):
    train_data = torch.unsqueeze(torch.tensor(train_dat, dtype = torch.float32, device = device),1)
    train_labels = torch.tensor(train_lab, dtype = torch.long, device = device)

    test_data = torch.unsqueeze(torch.tensor(test_dat, dtype = torch.float32, device = device),1)
    test_labels = torch.tensor(test_lab, dtype = torch.long, device = device)

    #test_data = torch.tensor(test_data)
    #test_data = torch.unsqueeze(test_data,1).float()
    #test_labels = torch.tensor(test_labels).long()
    return train_data, test_data, train_labels, test_labels

######### Check correct distr of split ################
def split_info(train_data, val_data, test_data, train_labels, val_labels, test_labels):
    print("Split info:")
    t = [0,0,0,0]
    v = [0,0,0,0]
    te = [0,0,0,0]
    for l in train_labels:
        t[l] +=1
    for l in val_labels:
        v[l] +=1
    for l in test_labels:
        te[l] +=1
    print(t,"\n", v, "\n", te)

    print(t[0]/len(train_data)*100, "% ", t[1]/len(train_data)*100, "% ",t[2]/len(train_data)*100, "% ",t[3]/len(train_data)*100, "% ")
    print(v[0]/len(val_data)*100, "% ", v[1]/len(val_data)*100, "% ",v[2]/len(val_data)*100, "% ",v[3]/len(val_data)*100, "% ")
    print(te[0]/len(test_data)*100, "% ", te[1]/len(test_data)*100, "% ",te[2]/len(test_data)*100, "% ",te[3]/len(test_data)*100, "% ")
