In [None]:
import numpy as np
from numpy import loadtxt
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import copy
import random
import scipy.io

In [None]:
drive.mount('/content/gdrive')
%cd /content/gdrive/My\ Drive

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
/content/gdrive/My Drive


In [None]:
### Read data from txt format produced by EEGLAB to numpy array and optionally save numpy arrays to CSV in Google Drive ###
### NOTE: This version allows creating random sample groups within a given (subject + dosage)
### This is useful, for example, to allow the model to choose one of N samples to base a classification off of
pathInMyDrive = "PsiloClassifier/txtDownsampledData/"
pathToSaveData = "PsiloClassifier/dataSplit/"

allSubjects = ["01", "02", "04", "05", "08", "10", "11", "12", "13", "14", "15", "16", "17", "20", "21", "22", "23", "24"]
testSubjects = ["02", "08", "15", "23"]
valSubjects = ["05", "16"]
trainSubjects = [x for x in allSubjects if x not in (testSubjects + valSubjects)]

selectEveryNthChannelN = 2
sampleLength = 250
saveToDisk = True
dataVersion = "3"
groupSize = 3

trainX, trainY, testX, testY, valX, valY = load(selectEveryNthChannelN, sampleLength, groupSize)

if(saveToDisk):
  scipy.io.savemat(pathToSaveData + "trainX" + dataVersion + ".mat", mdict={'trainX': trainX}, oned_as='row')
  scipy.io.savemat(pathToSaveData + "trainY" + dataVersion + ".mat", mdict={'trainY': trainY}, oned_as='row')
  scipy.io.savemat(pathToSaveData + "valX" + dataVersion + ".mat", mdict={'valX': valX}, oned_as='row')
  scipy.io.savemat(pathToSaveData + "valY" + dataVersion + ".mat", mdict={'valY': valY}, oned_as='row')
  scipy.io.savemat(pathToSaveData + "testX" + dataVersion + ".mat", mdict={'testX': testX}, oned_as='row')
  scipy.io.savemat(pathToSaveData + "testY" + dataVersion + ".mat", mdict={'testY': testY}, oned_as='row')

In [None]:
#to check that data saved to disk is correct:
matdata = scipy.io.loadmat(pathToSaveData + "testX" + dataVersion + ".mat")
assert np.all(testX == matdata['testX'])

In [None]:
#subject, timestamp, sample_index

In [None]:
def load(selectEveryNthChannelN, sampleLength, groupSize):
  trainXPlacebo, trainYPlacebo = loadProcess(trainSubjects, "PL", selectEveryNthChannelN, sampleLength, groupSize)
  trainXHD, trainYHD = loadProcess(trainSubjects, "HD", selectEveryNthChannelN, sampleLength, groupSize)
  trainX, trainY = catAndShuffleData(trainXPlacebo, trainYPlacebo, trainXHD, trainYHD)

  testXPlacebo, testYPlacebo = loadProcess(testSubjects, "PL", selectEveryNthChannelN, sampleLength, groupSize)
  testXHD, testYHD = loadProcess(testSubjects, "HD", selectEveryNthChannelN, sampleLength, groupSize)
  testX, testY = catAndShuffleData(testXPlacebo, testYPlacebo, testXHD, testYHD)

  valXPlacebo, valYPlacebo = loadProcess(valSubjects, "PL", selectEveryNthChannelN, sampleLength, groupSize)
  valXHD, valYHD = loadProcess(valSubjects, "HD", selectEveryNthChannelN, sampleLength, groupSize)
  valX, valY = catAndShuffleData(valXPlacebo, valYPlacebo, valXHD, valYHD)

  return trainX, trainY, testX, testY, valX, valY;

In [None]:
def loadProcess(subjectList, dose, selectEveryNthChannelN, sampleLength, groupSize):
  x = loadDownsampledDataFromSubjectListAndDose(subjectList, dose)
  x = keepEveryNthDataChannel(x, selectEveryNthChannelN)
  x = segmentChannelsIntoVectors(x, sampleLength)
  x = standardizeDataVectors(x)
  x = np.array(x)
  x = groupSelectedSampleCombinations(x, groupSize)
  x = combineSubjectsTogether(x)
  y = []
  if dose == "PL":
    for i in range(x.shape[0]):
      y.append(np.zeros(1))
  else:
    for i in range(x.shape[0]):
      y.append(np.ones(1))
  y = np.array(y)
  return x, y;

In [None]:
def loadDownsampledDataFromSubjectListAndDose(subjectList, dose):
  dataList = []
  for i in subjectList:
    filename = "S" + i + "_P300_" + dose + "_125.txt"
    dataList.append(loadtxt(pathInMyDrive + filename,  delimiter='\t'))
  return dataList

In [None]:
def keepEveryNthDataChannel(subjectDataList, n):
  dataList = []
  for subjectDataset in subjectDataList:
    dataList.append(subjectDataset[:, 0::n])
  return dataList

In [None]:
def segmentChannelsIntoVectors(subjectDataList, vectorLength):
  dataList = []
  for subjectDataset in subjectDataList:
    dataVectors = []
    for channel in range(np.size(subjectDataset, 1)):
      time = int(np.random.uniform(0, vectorLength))
      while (time + vectorLength) < np.size(subjectDataset, 0):
        dataVectors.append(subjectDataset[time : time + vectorLength , channel])
        time = time + vectorLength    
    dataList.append(dataVectors) #each element in "dataList" contains vector sample from one subject              
  return dataList

In [None]:
def groupSelectedSampleCombinations(x, groupSize):
  temp_deep_copy = copy.deepcopy(x)
  for sub_index, subject in enumerate(x):
    num_samples = len(subject)
    for samp_index, sample in enumerate(subject):
      for i in range(groupSize - 1):
        r = list(range(0, samp_index)) + list(range(samp_index+1, num_samples))
        rand_index = random.choice(r)
        x[sub_index][samp_index] = np.column_stack((x[sub_index][samp_index], temp_deep_copy[sub_index][rand_index]))
  return x

In [None]:
def combineSubjectsTogether(x):
  for i, ls in enumerate(x):
    x[i] = np.array(ls)
  dataList = []
  for subject in x:
    for mat in subject:
      dataList.append(mat)
  combinedX = np.array(dataList)
  return combinedX

In [None]:
def standardizeDataVectors(dataSubjectVectorList):
  dataList = []
  for subject in dataSubjectVectorList:
    dataVectors = []
    for vec in subject:
      mean = np.mean(vec)
      stdev = np.std(vec)
      standardVec = (vec - mean)/stdev
      dataVectors.append(standardVec)
    dataList.append(dataVectors)
  return dataList

In [None]:
def catAndShuffleData(x0, y0, x1, y1):
  x = np.concatenate((x0, x1), axis=0)
  y = np.concatenate((y0, y1), axis=0)

  x, y = shuffle(x, y)

  return x, y;