In [None]:
# combine CTD data and ROSINA data
# this script is used to attach CTD data to the "filtered" ROSINA data.
# To filter the ROSINA data, use the script "rosi_cleanup.ipynb".

In [56]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import os
import glob
import re

In [79]:
# set path to the data
rosipath = "/Users/dkim/Desktop/work/MSM107_analysis/data/06_ROSINA_new"
ctdpath = "/Users/dkim/Desktop/work/MSM107_analysis/data/04_CTD-Idronaut"

In [81]:
# grab the files including specific letters in the filename in the directory and subdirectories
# and then sort the list of files
rosifiles =  sorted(glob.glob(rosipath + "/**/filtered_*.txt", recursive=True))
ctdfiles = sorted(glob.glob(ctdpath + "/**/MSM107*-C*.txt", recursive=True))
# if the profile number in the file name is duplicated, then remain the first one in the list
rosifiles = [rosifiles[i] for i in range(len(rosifiles)) if rosifiles[i].split("/")[-2] not in [rosifiles[j].split("/")[-2] for j in range(i)]]
ctdfiles = [ctdfiles[i] for i in range(len(ctdfiles)) if ctdfiles[i].split("/")[-2] not in [ctdfiles[j].split("/")[-2] for j in range(i)]]

In [88]:
# match the CTD file to the ROSINA file by using the profile number in the file name and then combine the CTD data and ROSINA data
for i in range(len(rosifiles)):
    # extract the profile number from the rosina file name
    profnum = int(re.findall(r'\d+', rosifiles[i].split("/")[-1])[0])
    print(profnum)
    ctdfile = [ctdfiles[j] for j in range(len(ctdfiles)) if profnum == int(re.findall(r'\d+', ctdfiles[j].split("/")[-1].split("-")[1])[0])]
    if len(ctdfile) == 0:
        print("No CTD file for the profile number: ", profnum)
        continue
    else:
        ctdfilename = ctdfile[0]
    
    # 1. read the ROSINA data and CTD data
    rosi = pd.read_csv(rosifiles[i], sep=",", header=0)
    ctd = pd.read_csv(ctdfilename, sep="\t", header=16)

    # 2. remove rows having NaN in the "depth" column in the ROSINA data
    rosi = rosi.dropna(subset=["depth"])
    
    # append the CTD data to the ROSINA data based on the closest depth row by row
    for row, value in rosi.iterrows():
        rosidepth = value["depth"]
        # find the index of closest depth in the CTD data
        ctdindex = np.abs(ctd["Depth"] - rosidepth).idxmin()
        # append the whole columns of the CTD data to the ROSINA data using the ctdindex
        if abs(ctd.loc[ctdindex, "Depth"] - rosidepth) > 5: # if the difference between the CTD depth and ROSINA depth is larger than 5 m, then append NaN
            rosi.loc[row, ctd.columns] = np.nan
        else:
            rosi.loc[row, ctd.columns] = ctd.loc[ctdindex, :]
    
    # 3. save the combined data
    rosi.to_csv(rosifiles[i].replace("filtered", "filtered_CTD"), sep=",", index=False)


1
2
No CTD file for the profile number:  2
3
4
5
6
7
8
9
No CTD file for the profile number:  9
10
11
12
13
14
15
16
17
18
19
20
