In [None]:
import numpy as np
import requests
import time
import os
import io

# DESCRIPTION: This notebook contains code for extracting and saving a random subset of the test data from the EEG Epipleptic
# dataset at: https://data.mendeley.com/datasets/5pc2j46cbc/1. In the notebook, the number of samples in the extracted 
# random subset is denoted by "number_of_test_samples." By default, the notebook sets "number_of_test_samples" to 10, but the 
# user can change this to any other value, albeit users are STRONGLY ADVISED to AVOID values greater than 200, due to AWS API
# Gateway's 30 seconds timeout hard limit.

# Subsequent to extraction, the extracted random subset is saved to the user's local disk at the location indicated by 
# variable "data_file_path". By default, "data_file_path" is set to 'data_slice.csv', but the user can change it if they wish. 

# Also the program saves the ground truth labels associated with the extracted data. It uses variable "ground_truth_file_path" 
# to hold the path to which the ground truth labels is saved. By default the program sets variable "ground_truth_file_path" to
# 'ground_truth.txt', but the user can change this to some other value if they wish. The purpose of saving the ground truth 
# labels is to enable the user compare the ground truth labels with the values output from the EEG classification API, when the 
# API is invoked with the data saved in 'data_slice.csv' 

# Finally, the notebook utilizes the "user_seed" variable to achieve reproducilibity of results and control randomness. 
# For instance, running the notebook several times with user_seed = 40 and number_of_test_samples = 100 will extract EXACTLY
# the same set of samples from the dataset for all runs, because the user_seed was not changed from one run to the next. 
# To extract different sets of samples from one run to the next, the user needs to change the user_seed.




number_of_test_samples = 10
data_file_path = 'data_slice.csv' 
ground_truth_file_path = 'ground_truth.csv'
user_seed = 40


def load_data_from_url(url):
    response = requests.get(url)
    response.raise_for_status()
    data = np.load(io.BytesIO(response.content)) 
    return data

def save_data_slice(x, inds, file_path):
    data_slice = x[inds, :, :]
    numb_samps, dim_1, dim_2 = data_slice.shape
    data_slice = data_slice.reshape(numb_samps, dim_1*dim_2)
    np.savetxt(file_path, data_slice, delimiter = ',' )

def get_random_indices(required_size, max_index):
    
    rng = np.random.default_rng(seed=user_seed)
    a = np.arange(max_index)
    b = rng.choice(a, size = required_size, replace = False)
    return b

def save_ground_truth(gt_dict):
    f = open('ground_truth.txt', 'w')
    for i, dict_key in enumerate(gt_dict):
        print(i + 1, gt_dict[dict_key])
        f.write( str(i + 1) + ' ' + gt_dict[dict_key] + '\n')
    f.close


url_x = 'https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/93b81166-0e48-4dc0-ac20-b7167f7606c5/file_downloaded'
test_data = load_data_from_url (url_x)
rand_inds = get_random_indices(number_of_test_samples , int(test_data.shape[2])) 

random_test_data = save_data_slice(test_data, rand_inds, data_file_path)

url_y = 'https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/adf1c2fd-81ef-4f87-86cc-56d75bba8c31/file_downloaded'
labels = load_data_from_url (url_y)
ground_truth = labels[rand_inds]

label_dict = {0: 'Normal', 1: 'Complex Partial Seizure', 2: 'Electrographic Seizure', 3: 'Video-detected Seizure' }
dict_inds = list(range(number_of_test_samples))
ground_truth_dict = {dict_inds[i] + 1: label_dict[ground_truth[i]] for i in range(number_of_test_samples)}
print('GROUND TRUTH: \n')
save_ground_truth(ground_truth_dict)

