# Create data csv file.
## The code

In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
def load_file(filename, label, file_label, delimiter):
    
    # open the file
    with open(filename) as f:
        
        # get the header
        for line in f:
            if line.startswith('Time'):
                break
                
        # convert the header into a list of headings
        header = line.strip('\n').split(delimiter)
        
        # append label and file label to header
        header += ['Label', 'File_Label']
        
        # read in rest of data
        data = []
        for line in f:
            
            # strip the new line character and split using the tab
            
            line = line.strip('\n').split(delimiter) 
            
            # add label and file label to end of line
            line += [str(label), str(file_label)]
            
            # add line to end of data list
            data.append(line)
            
        # convert data list to pandas data frame and return
        frame = pd.DataFrame(data, columns = header)
        return frame

In [3]:
def load_directory(filelist, data_directory, data_filename, delimiter):
    # load the filelist
    filelist = pd.read_csv(filelist)
    
    frames = [] # list to contain frame for each file
    
    # Load each of the files and put the data frame at the end of the frame list
    for i, (file, label) in enumerate(zip(filelist['Filename'], filelist['Label'])):
        full_name = os.path.join(data_directory, file) 
        frame = load_file(full_name, label, i, delimiter) 
        frames.append(frame) 
    
    # concatenate frames into one frame
    frames = pd.concat(frames, sort=False)
        
    # output frame to file
    with open(data_filename, 'w') as f:
        frames.to_csv(f, sep = ',', index=False)

In [9]:
for sub_directory in ['2014']:
    filelist = "filelist_{}.csv".format(sub_directory)
    data_directory = os.path.join(os.curdir, sub_directory)
    data_filename = "data_{}.csv".format(sub_directory)
    delimiter = ',' if sub_directory == 'PSL' else '\t'
    load_directory(filelist, data_directory, data_filename, delimiter)

## The output

In [7]:
# set the label type to string (to allow for 'F')
types = {
    'Label':'str'
}
pd.read_csv("data_PSL.csv", dtype = types)

Unnamed: 0,Time,FL2 SctInt,Scat_EL1,Scat_EL2,Scat_EL3,Scat_EL4,FL2 SctPk,FL1_280,FL2_280,Pwr_280,...,Size,AF,TotalT2,MeasT2,FT,TPCT1,TotalT1,Label,File_Label,Pw
0,149,216,45,38,54,55,49,308,2112,2047,...,2.00700,2.0180,24.0,20.8,2,0,22.33,1,0,
1,157,196,39,36,49,47,35,336,2112,2047,...,1.53400,2.0500,24.0,20.8,2,0,22.33,1,0,
2,197,186,45,38,55,56,40,354,2112,2047,...,1.72200,1.9630,24.0,20.8,2,0,22.33,1,0,
3,240,186,44,39,53,57,53,317,2112,2047,...,2.11700,2.5080,24.0,20.8,2,0,22.33,1,0,
4,269,191,39,36,50,52,44,328,2112,2047,...,1.85700,1.5130,24.0,20.8,2,0,22.33,1,0,
5,287,149,40,35,53,49,39,340,2112,2047,...,1.68600,1.8770,24.0,20.8,2,0,22.33,1,0,
6,335,22,4,6,5,9,9,196,0,2047,...,0.34540,4.7090,24.0,20.8,2,0,22.33,1,0,
7,351,211,42,40,48,51,46,333,2112,2047,...,1.91900,4.3320,24.0,20.8,2,0,22.33,1,0,
8,360,246,41,38,51,49,48,368,2112,2047,...,1.97900,2.3280,24.0,20.8,2,0,22.33,1,0,
9,392,153,42,35,51,51,36,318,2112,2047,...,1.57300,2.0720,24.0,20.8,2,0,22.33,1,0,
