In [35]:
import pandas as pd
from datetime import datetime

In [32]:
# Read in and standardize Underway data

processed_underway_ifcb_data = pd.read_excel('IFCB_Logs/Pioneer-20_AR82_IFCB_Log_2025-03-13_SEN.xlsx', sheet_name=1, header=3)
processed_underway_ifcb_data = processed_underway_ifcb_data.fillna('')
processed_underway_ifcb_data.columns = processed_underway_ifcb_data.columns.str.strip()


# Read in and standardize Discrete data
processed_discrete_ifcb_data = pd.read_excel('IFCB_Logs/Pioneer-20_AR82_IFCB_Log_2025-03-13_SEN.xlsx', sheet_name=2, header=3)
processed_discrete_ifcb_data = processed_discrete_ifcb_data.fillna('')
processed_discrete_ifcb_data.columns = processed_discrete_ifcb_data.columns.str.strip()

cruise = 'AR82' # format example: AR82
ifcb_instrument_num = 'IFCB206' # format: IFCBXXX

#this is the only block that contains variables that need to be updated
processed_discrete_ifcb_data.columns.tolist()

['Filename',
 'HDR Comment',
 'Sample Type',
 'Cruise Leg',
 'Site',
 'Cast',
 'Niskin',
 'IFCB Bottle',
 'Target Cast Depth',
 'Trip Depth',
 '# Triggers',
 '# ROIs',
 'Run time',
 'Inhibit time',
 'Sample time',
 'Volume Analyzed',
 'ROIs/ml',
 'Cast Start Latitude',
 'Cast Start Longitude',
 'Notes']

In [33]:
columns_in_metadata_csv = ['filename', 'Latitude', 'Longitude', 'Depth', 'sample_type', 'Cruise', 'Instrument', 'tag1', 'tag2']

processed_underway_ifcb_data_mapped = {
    'filename': processed_underway_ifcb_data['Filename'], 
    'Latitude': processed_underway_ifcb_data['Ship Latitude'],
    'Longitude': processed_underway_ifcb_data['Ship Longitude'],
    'Depth': 3,
    'sample_type': 'underway',
    'Cruise': cruise,
    'Instrument': ifcb_instrument_num,
    'tag1': processed_underway_ifcb_data['Site'].apply(lambda x: 'site_' + str(x) if pd.notna(x) and x != '' else x), 
    'tag2': 'targetdepth_surface',
    'tag3': ''
}

processed_discrete_ifcb_data_mapped = {
    'filename': processed_discrete_ifcb_data['Filename'], 
    'Latitude': processed_discrete_ifcb_data['Cast Start Latitude'],
    'Longitude': processed_discrete_ifcb_data['Cast Start Longitude'],
    'Depth': processed_discrete_ifcb_data['Trip Depth'],
    'sample_type': processed_discrete_ifcb_data['Sample Type'].apply(
        lambda x: 'discrete' if 'discrete' in str(x).lower() and 'test' not in str(x).lower() and 'beads' not in str(x).lower() else x
    ),
    'Cruise': cruise,
    'Instrument':ifcb_instrument_num,
    'tag1': processed_discrete_ifcb_data['Site'].apply(lambda x: 'site_' + str(x) if pd.notna(x) and x != '' else x), 
    'tag2': processed_discrete_ifcb_data['Target Cast Depth'].apply(lambda x: 'site_' + str(x) if pd.notna(x) and x != '' else x), 
    'tag3': processed_discrete_ifcb_data.apply(
        lambda row: 'qc_' + str(row['Sample Type']) if (
            (pd.notna(row['Sample Type']) and 'test' in str(row['Sample Type']).lower()) or 
            (pd.notna(row['HDR Comment']) and 'test' in str(row['HDR Comment']).lower()) or
            (pd.notna(row['Sample Type']) and 'beads' in str(row['Sample Type']).lower()) or 
            (pd.notna(row['HDR Comment']) and 'beads' in str(row['HDR Comment']).lower())
        ) else '', axis=1
    )
}


underway_new = pd.DataFrame(processed_underway_ifcb_data_mapped)
discrete_new = pd.DataFrame(processed_discrete_ifcb_data_mapped)


metadata_df = pd.concat([underway_new, discrete_new], ignore_index=True)


metadata_df

Unnamed: 0,filename,Latitude,Longitude,Depth,sample_type,Cruise,Instrument,tag1,tag2,tag3
0,D20240401T151712_IFCB206,41.524,-70.672,3,underway,AR82,IFCB206,,targetdepth_surface,
1,D20240401T173608_IFCB206,41.524,-70.672,3,underway,AR82,IFCB206,,targetdepth_surface,
2,D20240401T180004_IFCB206,41.524,-70.672,3,underway,AR82,IFCB206,,targetdepth_surface,
3,D20240401T182359_IFCB206,41.524,-70.672,3,underway,AR82,IFCB206,,targetdepth_surface,
4,D20240401T184754_IFCB206,40.291,-70.882,3,underway,AR82,IFCB206,,targetdepth_surface,
...,...,...,...,...,...,...,...,...,...,...
946,D20240416T225730_IFCB206,35.725033,-74.85305,23.0,discrete,AR82,IFCB206,site_CP11SOSM,site_chl max,
947,D20240416T232124_IFCB206,35.725033,-74.85305,23.0,discrete,AR82,IFCB206,site_CP11SOSM,site_chl max,
948,D20240417T150235_IFCB206,36.175117,-74.826233,3.0,discrete,AR82,IFCB206,site_CP11NOSM,site_surface,
949,D20240417T152629_IFCB206,36.175117,-74.826233,3.0,discrete,AR82,IFCB206,site_CP11NOSM,site_surface,


In [36]:
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
output_filename = f"Metadata_CSVs/ifcb_dashboard_metadata_{timestamp}.csv"
metadata_df.to_csv(output_filename, index=False)