In [1]:
import os, sys
import numpy as np
import datetime
import warnings
import re
import pandas as pd
from EEG_feature_extraction import generate_feature_vectors_from_samples


In [2]:
# Gets rid of pesky warnings about the logm not bein accurate, could be worth going back and looking at these later
warnings.filterwarnings(
    "ignore",
    message="logm result may be inaccurate*",
    category=RuntimeWarning
)

In [5]:
#This is the generating matrix code for the original github data
def gen_training_matrix(directory_path, output_file, cols_to_ignore):
	# Initialise return matrix
	FINAL_MATRIX = None
	
	for x in os.listdir(directory_path):

		# Ignore non-CSV files
		if not x.lower().endswith('.csv'):
			continue
		
		# For safety we'll ignore files containing the substring "test". 
		# [Test files should not be in the dataset directory in the first place]
		if 'test' in x.lower():
			continue
		try:
			name, state, _ = x[:-4].split('-')
		except:
			print ('Wrong file name', x)
			sys.exit(-1)
		if state.lower() == 'concentrating':
			state = 2.
		elif state.lower() == 'neutral':
			state = 1.
		elif state.lower() == 'relaxed':
			state = 0.
		else:
			print ('Wrong file name', x)
			sys.exit(-1)
			
		print ('Using file', x)
		full_file_path = directory_path  +   '/'   + x
		vectors, header = generate_feature_vectors_from_samples(file_path = full_file_path, 
														        nsamples = 150, 
																period = 1.,
																state = state,
														        remove_redundant = True,
																cols_to_ignore = cols_to_ignore)
		
		print('resulting vector shape for the file', vectors.shape)
		
		
		if FINAL_MATRIX is None:
			FINAL_MATRIX = vectors
		else:
			FINAL_MATRIX = np.vstack( [ FINAL_MATRIX, vectors ] )

	print('FINAL_MATRIX', FINAL_MATRIX.shape)
	
	# Shuffle rows
	# np.random.shuffle(FINAL_MATRIX)
	
	# Save to file
	np.savetxt(output_file, FINAL_MATRIX, delimiter = ',',
			header = ','.join(header), 
			comments = '')

	return None


if __name__ == '__main__':
	
	
	# if len(sys.argv) < 3: # For use in calling the function from the command line
	# 	print ('arg1: input dir\narg2: output file')
	# 	sys.exit(-1)

	date = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')

	target_dir = "original_data" # Change this one to change which data we're processing
	directory_path = "../cleaned datasets/" + target_dir

	output_file = f"../featuresets/{target_dir}_{date}.csv"
    
	gen_training_matrix(directory_path, output_file, cols_to_ignore = -1)


Using file name-concentrating-1.csv
Slice too short break
resulting vector shape for the file (116, 990)
Using file subjecta-concentrating-1.csv
Slice too short break
resulting vector shape for the file (116, 990)
Using file subjecta-concentrating-2.csv
Slice too short break
resulting vector shape for the file (102, 990)
Using file subjecta-neutral-1.csv
Slice too short break
resulting vector shape for the file (116, 990)
Using file subjecta-neutral-2.csv
Slice too short break
resulting vector shape for the file (117, 990)
Using file subjecta-relaxed-1.csv
Slice too short break
resulting vector shape for the file (116, 990)
Using file subjecta-relaxed-2.csv
Slice too short break
resulting vector shape for the file (116, 990)
Using file subjectb-concentrating-1.csv
Slice too short break
resulting vector shape for the file (86, 990)
Using file subjectb-concentrating-2.csv
Slice too short break
resulting vector shape for the file (87, 990)
Using file subjectb-neutral-1.csv
Slice too short

In [8]:
# This is the generating matrix code for Mendeley Data

def gen_training_matrix(directory_path, output_file, cols_to_ignore):
	# Initialise return matrix
	FINAL_MATRIX = None
	
	for x in os.listdir(directory_path):

		# Ignore non-CSV files
		if not x.lower().endswith('.csv'):
			continue
		
		# For safety we'll ignore files containing the substring "test". 
		# [Test files should not be in the dataset directory in the first place]
		if 'test' in x.lower():
			continue
		try:
			name, _ ,state, _ = x[:-4].split('-')
		except:
			print ('Wrong file name', x)
			sys.exit(-1)
		if state.lower() == '1':
			state = 1.
		elif state.lower() == '2':
			state = 2.
		elif state.lower() == '3':
			state = 3.
		else:
			print ('Wrong file name', x)
			sys.exit(-1)
			
		print ('Using file', x)
		full_file_path = directory_path  +   '/'   + x
		vectors, header = generate_feature_vectors_from_samples(file_path = full_file_path, 
														        nsamples = 150, 
																period = 1.,
																state = state,
														        remove_redundant = True,
																cols_to_ignore = cols_to_ignore)
		
		print ('resulting vector shape for the file', vectors.shape)
		
		
		if FINAL_MATRIX is None:
			FINAL_MATRIX = vectors
		else:
			FINAL_MATRIX = np.vstack( [ FINAL_MATRIX, vectors ] )

	print ('FINAL_MATRIX', FINAL_MATRIX.shape)
	
	# Shuffle rows
	# np.random.shuffle(FINAL_MATRIX)
	
	# Save to file
	np.savetxt(output_file, FINAL_MATRIX, delimiter = ',',
			header = ','.join(header), 
			comments = '')

	return None


if __name__ == '__main__':
	
	
	# if len(sys.argv) < 3: # For use in calling the function from the command line
	# 	print ('arg1: input dir\narg2: output file')
	# 	sys.exit(-1)

	date = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')

	target_dir = "Mendeley cleaned" # Change this one to change which data we're processing
	directory_path = "../cleaned datasets/" + target_dir

	output_file = f"../featuresets/{target_dir}_{date}.csv"
    
	gen_training_matrix(directory_path, output_file, cols_to_ignore = [1,2,3,4])


Using file subject1-label-1-cleaned.csv
Slice too short break
Slice too short break
Slice too short break
End of recording break
resulting vector shape for the file (357, 1247)
Using file subject1-label-2-cleaned.csv
End of recording break
resulting vector shape for the file (363, 1247)
Using file subject1-label-3-cleaned.csv
Slice too short break
Slice too short break
Slice too short break
Slice too short break
Slice too short break
End of recording break
resulting vector shape for the file (363, 1247)
Using file subject2-label-1-cleaned.csv
Slice too short break
Slice too short break
Index break
Index break
Slice too short break
Slice too short break
Slice too short break
Index break
Index break
Index break
Index break
Index break
Slice too short break
Slice too short break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Slice too short break
Index break
Slice too short break
Slice too short break
Index break
Index break
Slice too short

In [7]:
#This is the generating matrix code for our own data
def gen_training_matrix(directory_path, output_file, cols_to_ignore):
	# Initialise return matrix
	FINAL_MATRIX = None
	
	for x in os.listdir(directory_path):

		# Ignore non-CSV files
		if not x.lower().endswith('.csv'):
			continue
		
		# For safety we'll ignore files containing the substring "test". 
		# [Test files should not be in the dataset directory in the first place]
		if 'test' in x.lower():
			continue
		try:
			name, state, _, _ = x[:-4].split('-')
		except:
			print ('Wrong file name', x)
			sys.exit(-1)
		if state.lower() == 'Marshall':
			state = 2.
		elif state.lower() == 'label':
			state = 1.
		elif state.lower() == 'Kenzo':
			state = 0.
		else:
			print ('Wrong file name', x)
			sys.exit(-1)
			
		print ('Using file', x)
		full_file_path = directory_path  +   '/'   + x
		vectors, header = generate_feature_vectors_from_samples(file_path = full_file_path, 
														        nsamples = 150, 
																period = 1.,
																state = state,
														        remove_redundant = True,
																cols_to_ignore = cols_to_ignore)
		
		print ('resulting vector shape for the file', vectors.shape)
		
		
		if FINAL_MATRIX is None:
			FINAL_MATRIX = vectors
		else:
			FINAL_MATRIX = np.vstack( [ FINAL_MATRIX, vectors ] )

	print ('FINAL_MATRIX', FINAL_MATRIX.shape)
	
	# Shuffle rows
	# np.random.shuffle(FINAL_MATRIX)
	
	# Save to file
	np.savetxt(output_file, FINAL_MATRIX, delimiter = ',',
			header = ','.join(header), 
			comments = '')

	return None


if __name__ == '__main__':
	
	
	# if len(sys.argv) < 3: # For use in calling the function from the command line
	# 	print ('arg1: input dir\narg2: output file')
	# 	sys.exit(-1)

	date = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')

	target_dir = "local datasets" # Change this one to change which data we're processing
	directory_path = "../cleaned datasets/" + target_dir

	output_file = f"../featuresets/{target_dir}_{date}.csv"
    
	gen_training_matrix(directory_path, output_file, cols_to_ignore = [5,6,7,8,9])

Using file subjectNatalie-label-1-cleaned.csv
Slice too short break
Slice too short break
Index break
Index break
Slice too short break
End of recording break
resulting vector shape for the file (67, 990)
FINAL_MATRIX (67, 990)


In [6]:
# This is the generating matrix code for the Emotion data
def gen_training_matrix(directory_path, output_file, cols_to_ignore):
	# Initialise return matrix
	FINAL_MATRIX = None
	
	for x in os.listdir(directory_path):

		# Ignore non-CSV files
		if not x.lower().endswith('.csv'):
			continue
		
		# For safety we'll ignore files containing the substring "test". 
		# [Test files should not be in the dataset directory in the first place]
		if 'test' in x.lower():
			continue
		try:
			name, _ ,state, _ = x[:-4].split('-')
		except:
			print ('Wrong file name', x)
			sys.exit(-1)
		if state.lower() == '1': #Anger
			state = 1.
		elif state.lower() == '2': #Fear
			state = 2.
		elif state.lower() == '3': #Happiness
			state = 3.
		elif state.lower() == '4': #Sadness
			state = 4.
		else:
			print ('Wrong file name', x)
			sys.exit(-1)
			
		print ('Using file', x)
		full_file_path = directory_path  +   '/'   + x
		vectors, header = generate_feature_vectors_from_samples(file_path = full_file_path, 
														        nsamples = 150, 
																period = 1.,
																state = state,
														        remove_redundant = True,
																cols_to_ignore = cols_to_ignore)
		
		print ('resulting vector shape for the file', vectors.shape)
		
		
		if FINAL_MATRIX is None:
			FINAL_MATRIX = vectors
		else:
			FINAL_MATRIX = np.vstack( [ FINAL_MATRIX, vectors ] )

	print ('FINAL_MATRIX', FINAL_MATRIX.shape)
	
	# Shuffle rows
	# np.random.shuffle(FINAL_MATRIX)
	
	# Save to file
	np.savetxt(output_file, FINAL_MATRIX, delimiter = ',',
			header = ','.join(header), 
			comments = '')

	return None


if __name__ == '__main__':
	
	
	# if len(sys.argv) < 3: # For use in calling the function from the command line
	# 	print ('arg1: input dir\narg2: output file')
	# 	sys.exit(-1)

	date = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')

	target_dir = "Emotion cleaned" # Change this one to change which data we're processing
	directory_path = "../cleaned datasets/" + target_dir

	output_file = f"../featuresets/{target_dir}_{date}.csv"
    
	gen_training_matrix(directory_path, output_file, cols_to_ignore = [1,2,3,4])


Using file subject1-label-1-cleaned.csv
Slice too short break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Slice too short break
Slice too short break
Index break
Slice too short break
Slice too short break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index break
Index brea

KeyboardInterrupt: 