# Concatenate CSV files

## Reads all the CSV files from one folder, deciphers any parameters saved into the filenames, and saves it as one large .csv file, adding in any desired parameters from the filename

In [1]:
import pandas as pd
import glob
import re

### Define the parameters you want to read from the filename and the delimeters between parameters, and their values

In [3]:
OUTER_PARAM_FILENAME_STRINGS = []
OUTER_PARAM_END_CHAR = "_"
OUTER_PARAM_VAL_SEP_CHAR = "="
OUTER_PARAM_NEW_COL_NAMES = []

### Set the path to the folder containing the files as well as the final output filename

In [4]:
# Path and filenames
path = 'Z:\\google-drive\\WTe2\\Raw Data\\2022-01-11 BWX\\n-D 2term\\'
outputName = 'n-D sweeps_V6-12=100uVac_T=100mK_gainI=1e8_ground others'

### Find all the files

In [5]:
# Read all the filenames in the folder
allFiles = [fn.replace(path, '') for fn in list(filter(lambda name: not re.findall(outputName, name), glob.glob(path + "*.csv")))]
print(f"Found {len(allFiles)} files...")

Found 101 files...


### Create a dictionary containing the parameters stored in each filename

In [6]:
# Grab the params that live in the filenames
file_dict = {}
p_keys = []
for fn in allFiles:
    file_dict[fn] = {}
    fn_params = fn.split(OUTER_PARAM_END_CHAR)
    for _p in fn_params:
        if OUTER_PARAM_VAL_SEP_CHAR in _p:
            [p, v] = _p.split(OUTER_PARAM_VAL_SEP_CHAR)
            file_dict[fn][p]=v
            
            if p not in p_keys:
                p_keys.append(p)
                print(f"Found a new parameter key: {p}")

Found a new parameter key: dV
Found a new parameter key: V6-10
Found a new parameter key: T
Found a new parameter key: gainI


### Read each file, add in a column for each desired filename parameter, then concatenate all the files together

In [7]:
# Create the DataFrames
dataFrames = []

for fn in allFiles:
    df = pd.read_csv(path+fn)
    
    for p, name in zip(OUTER_PARAM_FILENAME_STRINGS, OUTER_PARAM_NEW_COL_NAMES):
        if p not in file_dict[fn].keys():
            print(f"ERROR: Could not find start parameter {p} in filename {fn}. Aborting.")
        
        outer_param = float(file_dict[fn][p])
        new_col = [outer_param for i in range(len(df))]
        df[name] = new_col
    
    dataFrames.append(df)

singleFrame = pd.concat(dataFrames, axis=0, ignore_index=True)
singleFrame.to_csv(path+outputName+'.csv', index=False)
print(f"Wrote a concatenated data file at {path+outputName+'.csv'}")

Wrote a concatenated data file at Z:\google-drive\WTe2\Raw Data\2022-01-11 BWX\n-D 2term\n-D sweeps_V6-12=100uVac_T=100mK_gainI=1e8_ground others.csv
