In [None]:
# detrend missions in the big dataset

# Pia Goecke, pia.goecke@uni-oldenburg.de
# 03.02.2025

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
import warnings
warnings.filterwarnings("ignore")
import os
import seaborn as sns
import plotly.graph_objs as go

%matplotlib qt


In [2]:
# read data from HE598
os.chdir('C:/Users/piago/Documents/Uni/5_Semester/Hiwi')

csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE598_All_Grids_Single_Sheet_Revised_v1.xlsx"
df = pd.read_excel(csv_file)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df_598 = df

# read data from HE609
csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE609_All_Grids_Single_Sheet_Revised_v1.xlsx"
df = pd.read_excel(csv_file)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df_609 = df

# read data from cruise HE614
csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE614_All_Grids_Single_Sheet.xlsx"
df = pd.read_excel(csv_file, skiprows=10)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df.rename(columns={'CTD_Temperature_50cm_[°C]' : 'CTD_Temperature_50cm',
                   'CTD_Temperature_60cm_[°C]': 'CTD_Temperature_60cm'
                   }, inplace= True)
df_614 = df

# read data from HE626
csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE626_All_Grids_Single_Sheet.xlsx"
df = pd.read_excel(csv_file)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df_626 = df

# some new columns
df_598['cruise'] = 'HE598'
df_609['cruise'] = 'HE609'
df_614['cruise'] = 'HE614'
df_626['cruise'] = 'HE626'

df_598['Date_Time'] = df_598.index
df_609['Date_Time'] = df_609.index
df_614['Date_Time'] = df_614.index
df_626['Date_Time'] = df_626.index



In [4]:
allcruises = pd.concat([df_598, df_609, df_614, df_626], axis = 0, ignore_index=True)
allcruises['Date'] = pd.to_datetime(allcruises['Date_Time'].dt.date)
allcruises['Month'] = allcruises['Date_Time'].dt.month

In [5]:

for date in ['2023-03-02','2023-03-08','2023-03-09','2023-03-17' ]:
    df = allcruises[allcruises['Date'] == date]

    # time in total seconds
    df['time_numeric'] = (df['Date_Time'] - df['Date_Time'].min()).dt.total_seconds()  

    # same for all depths
    if date == '2023-03-08':
        depths = ['SML', '30cm', '40cm', '50cm', '60cm', '85cm']
    else:
        depths = ['SML', '30cm', '40cm', '50cm', '60cm', '85cm', '100cm']

    for d in depths:
        if df[f'CTD_Temperature_{d}'].dropna().empty:
            # If the column is empty (all NaNs), skip this depth
            print(f"{date}: Column CTD_Temperature_{d} contains only NaN values. Creating nan-column for depth {d}.")
            df[f'detrend_Temp_{d}'] = np.nan

        else:
            coefficients = np.polyfit(df['time_numeric'], df[f'CTD_Temperature_{d}'], 2) # fit polynomial
            polynomial = np.poly1d(coefficients) #create function
            df[f'fitted_Temp_{d}'] = polynomial(df['time_numeric']) #values
            df[f'detrend_Temp_{d}'] = df[f'CTD_Temperature_{d}'] - df[f'fitted_Temp_{d}']
            df[f'detrend_Temp_{d}'] = df[f'detrend_Temp_{d}'] + df[f'fitted_Temp_{d}'].median()
            allcruises.loc[allcruises['Date'] == date, f'CTD_detrend_{d}'] = df[f'detrend_Temp_{d}']
            # df.rename(columns={f'CTD_Temperature_{d}' : f'CTD_Temperature_{d}_old'}, inplace=True)
            # df.rename(columns={f'detrend_2_Temp_{d}' : f'CTD_Temperature_{d}'}, inplace=True)


    # ... and same for airtemp
    coefficients = np.polyfit(df['time_numeric'], df['Campbell_Air_Temperature'], 2) # fit polynomial
    polynomial = np.poly1d(coefficients) #create function
    df[f'fitted_Temp_Air'] = polynomial(df['time_numeric']) #values
    df[f'Air_detrend_Temp'] = df['Campbell_Air_Temperature'] - df[f'fitted_Temp_Air']
    df[f'Air_detrend_Temp'] = df[f'Air_detrend_Temp'] + df[f'fitted_Temp_Air'].median()

    # ... and for RBRs
    depths = ['30cm', '40cm', '50cm', '60cm', '85cm', '100cm']
    for d in depths:
        if df[f'RBR_Temperature_{d}'].dropna().empty:
            print(f"{date}: Column RBR_Temperature_{d}cm is empty or contains only NaN values. creating nan-column for depth {d}.")
        else: 
            coefficients = np.polyfit(df['time_numeric'], df[f'RBR_Temperature_{d}'], 2) # fit polynomial
            polynomial = np.poly1d(coefficients) #create function
            df[f'rbr_fitted_Temp_{d}'] = polynomial(df['time_numeric']) #values
            df[f'rbr_detrend_Temp_{d}'] = df[f'RBR_Temperature_{d}'] - df[f'rbr_fitted_Temp_{d}']
            df[f'rbr_detrend_Temp_{d}'] = df[f'rbr_detrend_Temp_{d}'] + df[f'rbr_fitted_Temp_{d}'].median()

    print(f'Mission {date} detrended.\n')
# # df_9_1 = df_9[df_9['time_numeric'] < (df_9['time_numeric'].max()/2)]
# # df_9_2 = df_9[df_9['time_numeric'] > (df_9['time_numeric'].max()/2)]

2023-03-02: Column CTD_Temperature_40cm contains only NaN values. Creating nan-column for depth 40cm.
Mission 2023-03-02 detrended.

2023-03-08: Column CTD_Temperature_50cm contains only NaN values. Creating nan-column for depth 50cm.
2023-03-08: Column RBR_Temperature_50cmcm is empty or contains only NaN values. creating nan-column for depth 50cm.
Mission 2023-03-08 detrended.

2023-03-09: Column CTD_Temperature_50cm contains only NaN values. Creating nan-column for depth 50cm.
Mission 2023-03-09 detrended.

2023-03-17: Column CTD_Temperature_50cm contains only NaN values. Creating nan-column for depth 50cm.
Mission 2023-03-17 detrended.



In [7]:
allcruises.groupby('Date')['CTD_detrend_30cm'].mean() 

Date
2022-05-02         NaN
2022-05-05         NaN
2022-05-06         NaN
2022-05-07         NaN
2022-05-09         NaN
2022-05-10         NaN
2022-05-14         NaN
2022-05-17         NaN
2022-05-18         NaN
2022-05-19         NaN
2022-05-20         NaN
2022-10-05         NaN
2022-10-10         NaN
2022-10-12         NaN
2022-10-14         NaN
2022-10-17         NaN
2022-10-21         NaN
2022-10-22         NaN
2022-10-23         NaN
2023-03-02    6.804141
2023-03-03         NaN
2023-03-08    6.406879
2023-03-09    5.984589
2023-03-10         NaN
2023-03-11         NaN
2023-03-17    6.065578
2023-03-18         NaN
2023-03-19         NaN
2023-03-20         NaN
2023-07-21         NaN
2023-07-22         NaN
2023-07-23         NaN
2023-07-24         NaN
2023-07-27         NaN
2023-07-28         NaN
2023-07-29         NaN
2023-08-02         NaN
2023-08-03         NaN
2023-08-05         NaN
Name: CTD_detrend_30cm, dtype: float64