In [2]:
#!pip install geopy #run this line if geopy module is not found
#!pip install chardet #run this line if chardet module is not found
 
import os
import pandas as pd
import re
from datetime import datetime
from geopy.distance import geodesic
import chardet #character encoding detector
import numpy as np

In [3]:
# specify the directory where the underway IFCB Data is stored
dir = "sample_hdrs"
cruiseNum = "fileTEST" # this should follow the format AR82, etc. unless for test

# indicate the file parameters to target (these are the columns needed in the IFCB log)

file_parameters_from_hdr_files = ["FileComment", "triggerCount", "roiCount", "runTime", "inhibitTime", "SyringeSampleVolume", "syringeSamplingSpeed", "temperature", "RunFastFactor",
                   "sampleNumber", "runSampleFast"]

# dictionary used to organize the filenames/sample ids with the associated parameters
all_fileparam_dicts = []



In [4]:
#### DECLARE the FUNCTION USED for PATTERN MATCHING within the HEADER FILE

def gather_values(text_content, param_list):
    file_dict = {"Filename": filename}
    for p in param_list:
        escaped_dynamic_string = re.escape(p)
        pattern = re.compile(r'{}:\s*(.+)'.format(escaped_dynamic_string))
        dynamic_match = pattern.search(hdr_content)

        if dynamic_match:
            value = dynamic_match.group(1)
            #return dynamic_match.group(1)
            #print(p, " value: ", value)
            file_dict[p] = value
        else: #return None
            print(filename, "No match found.")

    all_fileparam_dicts.append(file_dict)

In [5]:
#### APPLY gather_values() to ALL HDR FILES in the ESTABLISHED dir

for file in os.listdir(dir):
    if file.endswith(".hdr") and not file.startswith("._"):
        filename = os.path.splitext(file)[0]
        filepath = os.path.join(dir, file)
        with open(filepath, "r", encoding="iso-8859-1") as f:
            hdr_content = f.read()
            #print(hdr_content) # read out of all hdr content
            gather_values(hdr_content, file_parameters_from_hdr_files)
        

In [6]:
underway_IFCB_hdr_output = pd.DataFrame(all_fileparam_dicts)
underway_IFCB_hdr_output

Unnamed: 0,Filename,FileComment,triggerCount,roiCount,runTime,inhibitTime,SyringeSampleVolume,syringeSamplingSpeed,temperature,RunFastFactor,sampleNumber,runSampleFast
0,D20240405T212510_IFCB199,CP10CNSM-00001 deployment,5399,5384,1200.89375,449.89739583333335,5,20,9.403791103990244,1,1,False
1,D20240403T141609_IFCB199,CP10CNSM-00001 deployment,6494,3666,1201.7823611111112,541.51,5,20,10.468260471503784,2,"manually edited test file SN jan 9, 2025",True
2,D20240404T122511_IFCB199,CP10CNSM-00001 deployment,6886,6868,1200.800277777778,574.8570138888889,5,20,10.042472724498367,1,1,False
3,D20240405T032510_IFCB199,CP10CNSM-00001 deployment,6644,6684,1201.181111111111,553.088125,5,20,9.510238040741598,1,1,False
4,D20240405T182512_IFCB199,CP10CNSM-00001 deployment,5135,5067,1201.7815277777777,427.6896527777778,5,20,9.403791103990244,1,1,False
5,D20240404T212511_IFCB199,CP10CNSM-00001 deployment,6367,6353,1201.4340277777778,530.3027430555555,5,20,9.616684977492952,1,1,False
6,D20240406T062512_IFCB199,CP10CNSM-00001 deployment,5538,5547,1202.0494444444444,461.5920833333333,5,20,9.615021744106215,1,1,False
7,D20240405T062512_IFCB199,CP10CNSM-00001 deployment,6452,6450,1202.055833333333,537.2093055555556,5,20,9.29734416723889,1,1,False
8,D20240406T032510_IFCB199,CP10CNSM-00001 deployment,5335,5319,1201.5975,443.9709027777778,5,20,9.403791103990244,1,1,False
9,D20240403T182514_IFCB199,CP10CNSM-00001 deployment,7476,4073,1201.294583333333,622.9629861111111,5,20,9.723131914244306,3,"manually edited file SN jan 9, 2026",True


In [7]:
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
output_filename = f"HDR_Summaries/underway_ifcb_hdr_summaries/{cruiseNum}_ifcb_underway_hdr_summary_{timestamp}.csv"
# underway_IFCB_hdr_output.to_csv(output_filename, index=False) # can skip this to avoid creating intermediate file

#### Adding Volume Analyzed (& lookTime, flowRate, and runSampleFast_Int) to the output dataframe

These can also be added via Excel functions rather than with this script.

In [8]:
# Pull date and time from the filename
underway_IFCB_hdr_output['Datetime'] = pd.to_datetime(underway_IFCB_hdr_output['Filename'].str[1:15], format='%Y%m%dT%H%M%S')

# Ensure needed numeric values are numeric and not strings
underway_IFCB_hdr_output['runTime'] = pd.to_numeric(underway_IFCB_hdr_output['runTime'], errors='coerce')
underway_IFCB_hdr_output['inhibitTime'] = pd.to_numeric(underway_IFCB_hdr_output['inhibitTime'], errors='coerce')
underway_IFCB_hdr_output['syringeSamplingSpeed'] = pd.to_numeric(underway_IFCB_hdr_output['syringeSamplingSpeed'], errors='coerce')
underway_IFCB_hdr_output['SyringeSampleVolume'] = pd.to_numeric(underway_IFCB_hdr_output['SyringeSampleVolume'], errors='coerce')
underway_IFCB_hdr_output['RunFastFactor'] = pd.to_numeric(underway_IFCB_hdr_output['RunFastFactor'], errors='coerce')


# Create a column for lookTime
underway_IFCB_hdr_output['lookTime'] = underway_IFCB_hdr_output['runTime'] - underway_IFCB_hdr_output['inhibitTime']


# Create a column for runSampleFast_Int
## if runsamplefast = false then runsamplefast_Int==1
#underway_IFCB_hdr_output['runSampleFast_Int'] = (underway_IFCB_hdr_output['runSampleFast'].str.lower() != 'true').astype(int) ### delete
#underway_IFCB_hdr_output['runSampleFast_Int'] ### delete

    
# Create a column for flowRate_mins

## syringeSamplingSpeed (McLane sets this to 20 minutes, i.e., this is a fixed 20 in the header file, which is not always accurate)
#### 20 minutes is accurate in the cases where the sampling volume is 5 mL, but becomes inaccurate when the sampling times are altered.
#### The more operationally precise way to view this is to focus on the default flow rate being 0.25 mL/min (which is true, and is 
#### usually accurately represented by the defaults (5 mL SyringeSampleVolume / syringeSamplingSpeed).
#### 0.25 mL/min is only NOT accurate in cases where runSampleFast_Int is changed from 1 (the default) AND runSampleFast == TRUE.  

## SyringeSampleVolume (this is usually 5 ml unless the sample time is cut short by the operator (usually for testing reasons)
base_flow_rate = 0.25

underway_IFCB_hdr_output['flowRate_mins'] = np.where(
    underway_IFCB_hdr_output['runSampleFast'],
    base_flow_rate * underway_IFCB_hdr_output["RunFastFactor"],
    base_flow_rate
)


# underway_IFCB_hdr_output['flowRate_mins'] = underway_IFCB_hdr_output['SyringeSampleVolume'] / underway_IFCB_hdr_output['syringeSamplingSpeed']  ## original - can probably delete after monday huddle

# Create a column for volumeAnalyzed
#underway_IFCB_hdr_output['volumeAnalyzed'] = (underway_IFCB_hdr_output['RunFastFactor'] * underway_IFCB_hdr_output['runSampleFast_Int']) * underway_IFCB_hdr_output['flowRate_mins'] * (underway_IFCB_hdr_output['lookTime']/60)/5  #delete
underway_IFCB_hdr_output['volumeAnalyzed'] = underway_IFCB_hdr_output['flowRate_mins'] * (underway_IFCB_hdr_output['lookTime']/60)
underway_IFCB_hdr_output

Unnamed: 0,Filename,FileComment,triggerCount,roiCount,runTime,inhibitTime,SyringeSampleVolume,syringeSamplingSpeed,temperature,RunFastFactor,sampleNumber,runSampleFast,Datetime,lookTime,flowRate_mins,volumeAnalyzed
0,D20240405T212510_IFCB199,CP10CNSM-00001 deployment,5399,5384,1200.89375,449.897396,5,20,9.403791103990244,1,1,False,2024-04-05 21:25:01,750.996354,0.25,3.129151
1,D20240403T141609_IFCB199,CP10CNSM-00001 deployment,6494,3666,1201.782361,541.51,5,20,10.468260471503784,2,"manually edited test file SN jan 9, 2025",True,2024-04-03 14:16:00,660.272361,0.5,5.50227
2,D20240404T122511_IFCB199,CP10CNSM-00001 deployment,6886,6868,1200.800278,574.857014,5,20,10.042472724498367,1,1,False,2024-04-04 12:25:01,625.943264,0.25,2.608097
3,D20240405T032510_IFCB199,CP10CNSM-00001 deployment,6644,6684,1201.181111,553.088125,5,20,9.510238040741598,1,1,False,2024-04-05 03:25:01,648.092986,0.25,2.700387
4,D20240405T182512_IFCB199,CP10CNSM-00001 deployment,5135,5067,1201.781528,427.689653,5,20,9.403791103990244,1,1,False,2024-04-05 18:25:01,774.091875,0.25,3.225383
5,D20240404T212511_IFCB199,CP10CNSM-00001 deployment,6367,6353,1201.434028,530.302743,5,20,9.616684977492952,1,1,False,2024-04-04 21:25:01,671.131285,0.25,2.79638
6,D20240406T062512_IFCB199,CP10CNSM-00001 deployment,5538,5547,1202.049444,461.592083,5,20,9.615021744106215,1,1,False,2024-04-06 06:25:01,740.457361,0.25,3.085239
7,D20240405T062512_IFCB199,CP10CNSM-00001 deployment,6452,6450,1202.055833,537.209306,5,20,9.29734416723889,1,1,False,2024-04-05 06:25:01,664.846528,0.25,2.770194
8,D20240406T032510_IFCB199,CP10CNSM-00001 deployment,5335,5319,1201.5975,443.970903,5,20,9.403791103990244,1,1,False,2024-04-06 03:25:01,757.626597,0.25,3.156777
9,D20240403T182514_IFCB199,CP10CNSM-00001 deployment,7476,4073,1201.294583,622.962986,5,20,9.723131914244306,3,"manually edited file SN jan 9, 2026",True,2024-04-03 18:25:01,578.331597,0.75,7.229145


In [9]:
# SAVE THE HDR SUMMARY FILE CONTAINING THESE CALCULATED VALUES
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
# output_filename = f"HDR_Summaries/underway_ifcb_hdr_summaries/AR87_ifcb_underway_hdr_summary_with_calcd_values_{timestamp}.csv"
# underway_IFCB_hdr_output.to_csv(output_filename, index=False) # can skip this to avoid creating an intermediate file

#### Adding Ship Latitude and Longitude

In [10]:
# This file is created via the Ship underway Lat & Lon Processing.ipynb notebook
underway_file = "Merged_Ship_Underway_Files/mergedunderway_AR82.csv"
#underway_ship_data = pd.read_csv(underway_file, dtype={'Datetime_GMT_underway': 'datetime64[ns]'})
underway_ship_data = pd.read_csv(underway_file, parse_dates=['Datetime_GMT_underway'], low_memory=False)
underway_ship_data['Datetime_GMT_underway'] = pd.to_datetime(underway_ship_data['Datetime_GMT_underway'], errors='coerce')
underway_ship_data['Datetime_GMT_underway'].dtype
underway_ship_data


# Uncomment these two following lines if you want to look at faulty dates that might be present in the ship data
#invalid_dates = underway_ship_data[underway_ship_data['Datetime_GMT_underway'].isna()]
#print(invalid_dates)

Unnamed: 0,DATE_GMT,TIME_GMT,Dec_LAT,Dec_LON,SPD,HDT,DPS_COG,SOG,WXTP_Ta,WXTS_Ta,...,FLR,FLOW,SSVdslog,Depth12,Depth35,EM122,EM710,COG,Datetime_GMT_underway,Datetime_UTC_underway
0,2024/04/03,00:00:47.688,37.362,-74.208,10.84,207.11,207.24,11.08,11.1,11.1,...,365.4,45.4,1481.711,NAN,NAN,NAN,NAN,,2024-04-03 00:00:47.688,2024-04-03 00:00:47.688000+00:00
1,2024/04/03,00:01:47.688,37.359,-74.21,10.89,207.3,205.66,11.05,11.0,11.0,...,355.3,45.4,1481.726,NAN,NAN,NAN,NAN,,2024-04-03 00:01:47.688,2024-04-03 00:01:47.688000+00:00
2,2024/04/03,00:02:47.688,37.356,-74.212,10.89,208.64,207.46,11.27,11.0,11.0,...,352.3,45.2,1481.856,NAN,NAN,NAN,NAN,,2024-04-03 00:02:47.688,2024-04-03 00:02:47.688000+00:00
3,2024/04/03,00:03:47.688,37.353,-74.214,10.94,208.7,207.94,11.25,11.0,11.1,...,347.6,45.3,1481.988,NAN,NAN,NAN,NAN,,2024-04-03 00:03:47.688,2024-04-03 00:03:47.688000+00:00
4,2024/04/03,00:04:47.688,37.351,-74.215,10.9,208.27,205.49,11.32,11.0,11.2,...,340.8,45.3,1481.977,NAN,NAN,NAN,NAN,,2024-04-03 00:04:47.688,2024-04-03 00:04:47.688000+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31518,2024/04/16,23:55:48.965,35.943,-74.831,NAN,265.85,269.53,6.92,13.6,13.6,...,480.0,60.2,1492.866,NAN,NAN,NAN,NAN,,2024-04-16 23:55:48.965,2024-04-16 23:55:48.965000+00:00
31519,2024/04/16,23:56:48.965,35.943,-74.834,NAN,265.7,268.66,6.91,13.6,13.7,...,479.3,60.0,1492.798,NAN,NAN,NAN,NAN,,2024-04-16 23:56:48.965,2024-04-16 23:56:48.965000+00:00
31520,2024/04/16,23:57:48.965,35.943,-74.836,NAN,265.65,268.40,7.07,13.6,13.7,...,480.0,60.1,1492.739,NAN,NAN,NAN,NAN,,2024-04-16 23:57:48.965,2024-04-16 23:57:48.965000+00:00
31521,2024/04/16,23:58:48.965,35.943,-74.838,NAN,265.76,279.04,6.98,13.6,13.7,...,481.2,60.2,1492.673,NAN,NAN,NAN,NAN,,2024-04-16 23:58:48.965,2024-04-16 23:58:48.965000+00:00


In [11]:
# Sort both dfs by date
underway_IFCB_hdr_output = underway_IFCB_hdr_output.sort_values('Datetime')
underway_ship_data = underway_ship_data.sort_values('Datetime_GMT_underway')

# Drop any rows in the ship data that do not have datetimes
underway_ship_data = underway_ship_data.dropna(subset=['Datetime_GMT_underway'])

# Merge the underway ship data to the IFCB HDR summary df
hdr_summary_with_lat_and_lon = pd.merge_asof(underway_IFCB_hdr_output, underway_ship_data, left_on='Datetime', right_on='Datetime_GMT_underway', direction='nearest')

In [12]:
# Display the df containing all hdr summary information and ship lat and lon
hdr_summary_with_lat_and_lon

Unnamed: 0,Filename,FileComment,triggerCount,roiCount,runTime,inhibitTime,SyringeSampleVolume,syringeSamplingSpeed,temperature,RunFastFactor,...,FLR,FLOW,SSVdslog,Depth12,Depth35,EM122,EM710,COG,Datetime_GMT_underway,Datetime_UTC_underway
0,D20240403T141609_IFCB199,CP10CNSM-00001 deployment,6494,3666,1201.782361,541.51,5,20,10.468260471503784,2,...,277.6,50.1,1486.304,NAN,NAN,NAN,NAN,,2024-04-03 14:15:47.743,2024-04-03 14:15:47.743000+00:00
1,D20240403T152620_IFCB199,CP10CNSM-00001 deployment,6521,4443,1201.840417,542.937014,5,20,10.574707408255136,1,...,288.1,49.4,1486.228,NAN,NAN,NAN,NAN,,2024-04-03 15:25:47.748,2024-04-03 15:25:47.748000+00:00
2,D20240403T182514_IFCB199,CP10CNSM-00001 deployment,7476,4073,1201.294583,622.962986,5,20,9.723131914244306,3,...,304.4,49.7,1487.304,NAN,NAN,NAN,NAN,,2024-04-03 18:24:47.759,2024-04-03 18:24:47.759000+00:00
3,D20240403T212511_IFCB199,CP10CNSM-00001 deployment,7244,6864,1201.204306,603.92625,5,20,10.25370336461434,1,...,331.2,49.6,1486.625,NAN,NAN,NAN,NAN,,2024-04-03 21:24:47.771,2024-04-03 21:24:47.771000+00:00
4,D20240404T002511_IFCB199,CP10CNSM-00001 deployment,7410,7272,1201.419583,616.581597,5,20,10.468260471503784,1,...,341.2,49.7,1487.017,NAN,NAN,NAN,NAN,,2024-04-04 00:24:47.782,2024-04-04 00:24:47.782000+00:00
5,D20240404T032512_IFCB199,CP10CNSM-00001 deployment,7089,6897,1201.364028,589.993611,5,20,10.574707408255136,1,...,349.9,51.0,1486.677,NAN,NAN,NAN,NAN,,2024-04-04 03:24:47.793,2024-04-04 03:24:47.793000+00:00
6,D20240404T092532_IFCB199,CP10CNSM-00001 deployment,7906,6004,1201.589306,657.794028,5,20,10.311916533150224,1,...,373.6,28.0,1487.613,NAN,NAN,NAN,NAN,,2024-04-04 09:24:47.816,2024-04-04 09:24:47.816000+00:00
7,D20240404T122511_IFCB199,CP10CNSM-00001 deployment,6886,6868,1200.800278,574.857014,5,20,10.042472724498367,1,...,348.3,25.5,1487.96,NAN,NAN,NAN,NAN,,2024-04-04 12:24:47.827,2024-04-04 12:24:47.827000+00:00
8,D20240404T152510_IFCB199,CP10CNSM-00001 deployment,6244,6223,1201.740833,519.929583,5,20,9.936025787747012,1,...,406.2,31.8,1485.186,NAN,NAN,NAN,NAN,,2024-04-04 15:24:47.838,2024-04-04 15:24:47.838000+00:00
9,D20240404T182510_IFCB199,CP10CNSM-00001 deployment,6122,6118,1201.582778,509.815069,5,20,9.82957885099566,1,...,362.7,34.9,1485.764,NAN,NAN,NAN,NAN,,2024-04-04 18:24:47.850,2024-04-04 18:24:47.850000+00:00


In [13]:
# Save the df containing all hdr summary information and ship lat and lon
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
#output_filename = f"HDR_Summaries/underway_ifcb_hdr_summaries/AR87_ifcb_underway_hdr_summary_with_lat_lon_{timestamp}.csv"
# hdr_summary_with_lat_and_lon.to_csv(output_filename, index=False) # optional. can skip if you don't want an intermediate file.

#### Adding Site (from Ship Latitude and Longitude)

In [14]:
mab_site_centers = {
    'site': ['CP10CNSM;CP12CNSW', 'CP11NOSM;CP13NOPM', 'CP11SOSM;CP13SOPM', 'CP12WESW', 'CP13EAPM', 
             'CP14NEPM', 'CP14SEPM'],
    'lat': [35.95, 36.175, 35.725, 35.95, 35.95, 36.0536, 35.8514],
    'lon': [-75.125, -74.8267, -74.853, -75.3333, -74.8457, -74.7776,
            -74.8482]
}

centers_df = pd.DataFrame(mab_site_centers)

In [17]:
hdr_summary_with_lat_and_lon.columns.tolist()
hdr_summary_with_lat_and_lon[" Dec_LON"] # Dec_LON was used for AR82, but this col is not in the AR87 files
hdr_summary_with_lat_and_lon[" Dec_LAT"] # Dec_LON was used for AR82, but this col is not in the AR87 files
#hdr_summary_with_lat_and_lon[' CNAV_LON'] # CNAV_LON used for AR87
#hdr_summary_with_lat_and_lon[' CNAV_LAT'] #CNAV_LAT used for AR87

0      35.95
1      35.95
2     35.939
3     35.954
4     35.954
5     35.954
6     35.954
7     35.954
8     36.171
9     36.181
10    36.167
11     36.17
12    36.177
13    36.177
14    35.716
15    35.727
16    35.881
17      35.9
18     35.73
19     35.73
20     35.73
Name:  Dec_LAT, dtype: object

In [18]:
ifcb_lat = hdr_summary_with_lat_and_lon[" Dec_LAT"]
ifcb_lon = hdr_summary_with_lat_and_lon[" Dec_LON"]

hdr_summary_with_lat_and_lon_and_sites = hdr_summary_with_lat_and_lon

def check_within_radius(ifcb_lat, ifcb_lon, centers_df, radius):
    for _, center in centers_df.iterrows():
        center_lat = center['lat']
        center_lon = center['lon']
        center_name = center.get('site', 'Unnamed Site')

        # Calculate distance
        distance = geodesic((ifcb_lat, ifcb_lon), (center_lat, center_lon)).kilometers
        if distance <= radius:
            return True, center_name  # Return True and the site name if within radius
    return False, None  # Return False if not within any radius

# Define a wrapper function to apply row-wise
def apply_check_within_radius(row):
    return check_within_radius(row[' Dec_LAT'], row[' Dec_LON'], centers_df, radius=2)  # radius in kilometers



# Apply function to every row in the DataFrame
hdr_summary_with_lat_and_lon_and_sites[['within_radius', 'site_name']] = hdr_summary_with_lat_and_lon_and_sites.apply(apply_check_within_radius, axis=1, result_type="expand")

In [19]:
hdr_summary_with_lat_and_lon_and_sites

Unnamed: 0,Filename,FileComment,triggerCount,roiCount,runTime,inhibitTime,SyringeSampleVolume,syringeSamplingSpeed,temperature,RunFastFactor,...,SSVdslog,Depth12,Depth35,EM122,EM710,COG,Datetime_GMT_underway,Datetime_UTC_underway,within_radius,site_name
0,D20240403T141609_IFCB199,CP10CNSM-00001 deployment,6494,3666,1201.782361,541.51,5,20,10.468260471503784,2,...,1486.304,NAN,NAN,NAN,NAN,,2024-04-03 14:15:47.743,2024-04-03 14:15:47.743000+00:00,True,CP10CNSM;CP12CNSW
1,D20240403T152620_IFCB199,CP10CNSM-00001 deployment,6521,4443,1201.840417,542.937014,5,20,10.574707408255136,1,...,1486.228,NAN,NAN,NAN,NAN,,2024-04-03 15:25:47.748,2024-04-03 15:25:47.748000+00:00,True,CP10CNSM;CP12CNSW
2,D20240403T182514_IFCB199,CP10CNSM-00001 deployment,7476,4073,1201.294583,622.962986,5,20,9.723131914244306,3,...,1487.304,NAN,NAN,NAN,NAN,,2024-04-03 18:24:47.759,2024-04-03 18:24:47.759000+00:00,False,
3,D20240403T212511_IFCB199,CP10CNSM-00001 deployment,7244,6864,1201.204306,603.92625,5,20,10.25370336461434,1,...,1486.625,NAN,NAN,NAN,NAN,,2024-04-03 21:24:47.771,2024-04-03 21:24:47.771000+00:00,True,CP10CNSM;CP12CNSW
4,D20240404T002511_IFCB199,CP10CNSM-00001 deployment,7410,7272,1201.419583,616.581597,5,20,10.468260471503784,1,...,1487.017,NAN,NAN,NAN,NAN,,2024-04-04 00:24:47.782,2024-04-04 00:24:47.782000+00:00,True,CP10CNSM;CP12CNSW
5,D20240404T032512_IFCB199,CP10CNSM-00001 deployment,7089,6897,1201.364028,589.993611,5,20,10.574707408255136,1,...,1486.677,NAN,NAN,NAN,NAN,,2024-04-04 03:24:47.793,2024-04-04 03:24:47.793000+00:00,True,CP10CNSM;CP12CNSW
6,D20240404T092532_IFCB199,CP10CNSM-00001 deployment,7906,6004,1201.589306,657.794028,5,20,10.311916533150224,1,...,1487.613,NAN,NAN,NAN,NAN,,2024-04-04 09:24:47.816,2024-04-04 09:24:47.816000+00:00,True,CP10CNSM;CP12CNSW
7,D20240404T122511_IFCB199,CP10CNSM-00001 deployment,6886,6868,1200.800278,574.857014,5,20,10.042472724498367,1,...,1487.96,NAN,NAN,NAN,NAN,,2024-04-04 12:24:47.827,2024-04-04 12:24:47.827000+00:00,True,CP10CNSM;CP12CNSW
8,D20240404T152510_IFCB199,CP10CNSM-00001 deployment,6244,6223,1201.740833,519.929583,5,20,9.936025787747012,1,...,1485.186,NAN,NAN,NAN,NAN,,2024-04-04 15:24:47.838,2024-04-04 15:24:47.838000+00:00,True,CP11NOSM;CP13NOPM
9,D20240404T182510_IFCB199,CP10CNSM-00001 deployment,6122,6118,1201.582778,509.815069,5,20,9.82957885099566,1,...,1485.764,NAN,NAN,NAN,NAN,,2024-04-04 18:24:47.850,2024-04-04 18:24:47.850000+00:00,True,CP11NOSM;CP13NOPM


In [20]:
# SAVE this VERSION of the DF CONTAINING ALL SUMMARY DATA, LAT, LON, and SITE NAMES
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
output_filename = f"HDR_Summaries/underway_ifcb_hdr_summaries/{cruiseNum}_ifcb_underway_hdr_summary_with_lat_lon_and_sites_{timestamp}.csv"
hdr_summary_with_lat_and_lon_and_sites.to_csv(output_filename, index=False)