In [1]:
from collect_data import *
from torch.utils.data import Dataset, DataLoader
from sklearn.linear_model import Ridge

In [2]:
megsp_path = os.path.join(meg_path, 'collect_data/megsp')
audio_path = os.path.join(meg_path, 'collect_data/audio')
megsp_list = os.listdir(megsp_path)
audio_list = os.listdir(audio_path)

megsp_list_session_0 = [f for f in megsp_list if f.split('_')[1] == '0']
megsp_list_session_1 = [f for f in megsp_list if f.split('_')[1] == '1']

In [3]:
def split_tensor(tensor, train_ratio=0.7, val_ratio=0.1):
    total_samples = tensor.size(0)
    train_size = int(total_samples * train_ratio)
    val_size = int(total_samples * val_ratio)
    train_tensor = tensor[:train_size]
    val_tensor = tensor[train_size:train_size + val_size]
    test_tensor = tensor[train_size + val_size:]
    return train_tensor, val_tensor, test_tensor


def get_splitted_tensor(file_list, path):
    tensor_list_train = []
    tensor_list_valid = []
    tensor_list_test = []
    for file_name in file_list:
        file_path = os.path.join(path, file_name)
        tensor = torch.load(file_path)
        train_tensor, val_tensor, test_tensor = split_tensor(tensor)
        tensor_list_train.append(train_tensor)
        tensor_list_valid.append(val_tensor)
        tensor_list_test.append(test_tensor)
    tensor_train = torch.cat(tensor_list_train, dim=0)
    tensor_valid = torch.cat(tensor_list_valid, dim=0)
    tensor_test = torch.cat(tensor_list_test, dim=0)
    return tensor_train, tensor_valid, tensor_test



In [4]:
audio_tensor_train, audio_tensor_valid, audio_tensor_test = get_splitted_tensor(audio_list, audio_path)
audio_tensor_train = torch.cat((audio_tensor_train, audio_tensor_train), 0)
audio_tensor_valid = torch.cat((audio_tensor_valid, audio_tensor_valid), 0)
audio_tensor_test = torch.cat((audio_tensor_test, audio_tensor_test), 0)
print('DIMENSION_AUDIO_TENSOR_TRAIN: ', audio_tensor_train.shape)
print('DIMENSION_AUDIO_TENSOR_VALID: ', audio_tensor_valid.shape)
print('DIMENSION_AUDIO_TENSOR_TEST: ', audio_tensor_test.shape)

meg_0_tensor_train, meg_0_tensor_valid, meg_0_tensor_test = get_splitted_tensor(megsp_list_session_0, megsp_path)
meg_1_tensor_train, meg_1_tensor_valid, meg_1_tensor_test = get_splitted_tensor(megsp_list_session_1, megsp_path)
meg_tensor_train = torch.cat((meg_0_tensor_train, meg_1_tensor_train), 0)
meg_tensor_valid = torch.cat((meg_0_tensor_valid, meg_1_tensor_valid), 0)
meg_tensor_test = torch.cat((meg_0_tensor_test, meg_1_tensor_test), 0)
print('DIMENSION_MEG_TENSOR_TRAIN: ', meg_tensor_train.shape)
print('DIMENSION_MEG_TENSOR_VALID: ', meg_tensor_valid.shape)
print('DIMENSION_MEG_TENSOR_TEST: ', meg_tensor_test.shape)


DIMENSION_AUDIO_TENSOR_TRAIN:  torch.Size([11958, 257, 376])
DIMENSION_AUDIO_TENSOR_VALID:  torch.Size([1684, 257, 376])
DIMENSION_AUDIO_TENSOR_TEST:  torch.Size([3480, 257, 376])
DIMENSION_MEG_TENSOR_TRAIN:  torch.Size([11958, 208, 16, 26])
DIMENSION_MEG_TENSOR_VALID:  torch.Size([1684, 208, 16, 26])
DIMENSION_MEG_TENSOR_TEST:  torch.Size([3480, 208, 16, 26])


## Naive Check Number zero? Can we invert the spectrogram?

## Naive Check N1: Ridge Regression

In [19]:
audio_tensor_train[:,:62].view(audio_tensor_train.size(0), -1).shape

torch.Size([11958, 23312])

In [20]:
channel=0
regr=Ridge(alpha=1.0)

#train a ridge regression, flattening all the data inside the .fit method
regr.fit(audio_tensor_train.view(audio_tensor_train[:,:62].size(0), -1),meg_tensor_train.view(meg_tensor_train[:,channel,:,:10].size(0), -1))

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        """
        """
        assert X.size(0) == Y.size(0), "The number of input and output samples must be the same"
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
    

train_dataset = CustomDataset(audio_tensor_train, meg_0_tensor_train)
valid_dataset = CustomDataset(audio_tensor_valid, meg_0_tensor_valid)
test_dataset = CustomDataset(audio_tensor_test, meg_0_tensor_test)

batch_size = 32  # --> da scegliere
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)