In [37]:
import numpy as np
import pandas as pd
import os

In [None]:
path_to_data = '../../data/processed_data/'
path_to_raw = '../../data/raw_data/'
metadata_file = 'metadata.csv'

In [39]:
# load depths
depths = pd.read_excel(os.path.join(path_to_raw, 'depths.xlsx'))
# make the section column a string
depths['section'] = depths['section'].astype(str)
depths.head()

Unnamed: 0,core,section,top_depth,index_mark
0,dic1,140,0,0.1
1,dic1,141,0,0.1
2,dic1,142,0,0.1
3,dic1,143,0,0.1
4,dic1,144,0,0.1


In [40]:
# set flags in file and corresponding header in master csv
flag_dict = {'AC Collect Speed: ':'AC_col_sp',
'DC Collect Speed: ':'DC_col_sp',
'DC Voltage: ':'DC_volt',
'Note: ':'note',
'mm per encode step: ':'mm_per_encode_step',
'Number of Expected tracks: ':'num_tracks',
'ACDC offset: ':'ACDC_offset',
'Laser offset: ':'laser_offset',
'Y Left: ':'Y_left',
'Y Right: ':'Y_right',
'AC edgespace ':'AC_edgespace',
'DC edgespace ':'DC_edgespace',
'Index Mark (raw - not laser corrected): ':'idx1_raw',
'Index Mark Relative Depth: ':'idx1_rel',
'Index Mark 2 Relative Depth: ':'idx2_rel',
'Index Mark 3 Relative Depth: ':'idx3_rel',
'(first) Index Mark Absolute Depth: ' : 'idx_abs',
'X min Position (raw - not laser corrected): ':'xmin',
'X max Position (raw - not laser corrected): ': 'xmax'
    }

#header line
hline = 'Y_dimension(mm),X_dimension(mm),Button,AC,DC,True_depth(m)'

# extra headers for dataframe
extra_headers = ['time','core','section','face','ACorDC','header','filename']


In [41]:
# sort headers / keys
headers = list(flag_dict.values())
flags = flag_dict.keys()
for s in extra_headers:
    headers.append(s)

#  create dataframe

df = pd.DataFrame(columns=headers)

In [42]:
txt_files = []

folder_path = path_to_raw 

# Check if the folder exists
if os.path.exists(folder_path) and os.path.isdir(folder_path):

    # Get the list of all .txt files in the folder and add their paths to the txt_files list
    txt_files.extend([os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.txt')])


In [43]:
for f in txt_files:
    
    # open file
    
    
    vals=[]
    # extract values from file
    vals = []
    with open(f, 'r') as file:
        
        cnt = 0
        flags = list(flag_dict.keys())
        lcnt = 0
        for index,line in enumerate(file,1):
            for flag in flags:
                if flag in line:
                    flags.remove(flag)
                    vals.append(line[len(flag):-6])
            if hline in line:
                header = index
            
                    
        # on last line in file, check for AC or dc
        lp = line.split(',')
        if lp[4]=='--':
            ACorDC = 'AC'
        elif lp[3]=='--':
            ACorDC = 'DC'
        else:
            ACorDC = 'ERROR'
               
    # now add on extra headers not in the flags dict
    path = f.split('/')
    parts = path[-1].split('-')
    vals.append(parts[0]+'-'+parts[1]+'-'+parts[2]+'-'+parts[3]+'-'+parts[4])# time
    vals.append(parts[5]) # core
    section = parts[6]
    vals.append(parts[6]) # section
    vals.append(parts[7][:-4]) # face
    vals.append(ACorDC)
    vals.append(header)
    vals.append(f)
    
    # add to df
    data_dict = dict(zip(headers,vals))
    df = pd.concat([df,pd.DataFrame([data_dict])], ignore_index=True)

# make the section column a string
df['section'] = df['section'].astype(str)

In [44]:
# update the metadata file to include depth data
for index, row in df.iterrows():

    matching_depth = depths[(depths['core'] == row['core']) & (depths['section'] == row['section'])]
    if not matching_depth.empty:
        df.at[index, 'idx_abs'] = float(matching_depth.iloc[0]['top_depth'])
        df.at[index, 'idx1_rel'] = float(matching_depth.iloc[0]['index_mark'])
    else:
        print(f"No matching depth found for core {row['core']} and section {row['section']}.")

In [45]:
#%% Save metadata CSV

# rearange so the columns I want are at the front
h = list(df.columns)
front = ['core','time','section','face','ACorDC']
front.reverse()
for f in front:
    h.remove(f)
    h.insert(0,f)
df = df[h]

# drop duplicates (includes time, so only drops duplicates of the SAME RUN)
df = df.drop_duplicates(subset='time')

# sort values
df = df.sort_values(['core','section','face','ACorDC'], ascending=[True,True,True,True])

# save
df.to_csv(path_to_data+metadata_file)

OSError: Cannot save file into a non-existent directory: '../../data/proessed_data'

In [None]:
# loop through all rows in dataframe 
for index,row in df.iterrows():
    
    
    # read in raw data
    raw = pd.read_csv(row['filename'],header = row['header']-1)
    
    # rename AC/DC to meas
    raw['meas'] = raw[row['ACorDC']]
    raw = raw.drop(['AC', 'DC'], axis=1)

    # convert x to depth
    raw['True_depth(m)'] = float(row['idx_abs']) + float(row['idx1_rel'])/100 + (float(row['xmax']) - float(row['idx1_raw']) - raw['X_dimension(mm)']) / 1000
    
    # drop X dimension
    raw = raw.drop(['X_dimension(mm)'],axis=1)
    
    fname = row['core']+'-'+row['section']+'-'+row['face']+'-'+row['ACorDC']+'.csv'
    
    raw.to_csv(path_to_data+'/'+fname,index=False)
    