In [None]:
# plot as in Gladyshev, M. (2002). Biophysics of the surface microlayer of aquatic ecosystems. IWA Publishing.
# compare difference between SML-temperature / Air- temperature and SML-temperature / bulk-temperature

# Pia Goecke, pia.goecke@uni-oldenburg.de
# 06.12.2024

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
import warnings
warnings.filterwarnings("ignore")
import os
import seaborn as sns
import plotly.graph_objs as go

%matplotlib qt


In [2]:
# read data from HE598
os.chdir('C:/Users/piago/Documents/Uni/5_Semester/Hiwi')

csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE598_All_Grids_Single_Sheet_Revised_v1.xlsx"

# Read the Excel file
df = pd.read_excel(csv_file)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df_598 = df

In [3]:
# read data from HE609
csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE609_All_Grids_Single_Sheet_Revised_v1.xlsx"

# Read the Excel file
df = pd.read_excel(csv_file)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df_609 = df

In [4]:
# read data from cruise HE614
#read data and rename cols
csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE614_All_Grids_Single_Sheet.xlsx"

# Read the Excel file
df = pd.read_excel(csv_file, skiprows=10)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df.rename(columns={'CTD_Temperature_50cm_[°C]' : 'CTD_Temperature_50cm',
                   'CTD_Temperature_60cm_[°C]': 'CTD_Temperature_60cm'
                   }, inplace= True)

# df['Date'] = df.index.date
df_614 = df



In [5]:
# read data from HE626
csv_file = "C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Data/HE626_All_Grids_Single_Sheet.xlsx"

# Read the Excel file
df = pd.read_excel(csv_file)
df = df.set_index('Date_Time')
df.index = pd.to_datetime(df.index)
df_626 = df

In [6]:
# some new columns
df_598['cruise'] = 'HE598'
df_609['cruise'] = 'HE609'
df_614['cruise'] = 'HE614'
df_626['cruise'] = 'HE626'

df_598['Date_Time'] = df_598.index
df_609['Date_Time'] = df_609.index
df_614['Date_Time'] = df_614.index
df_626['Date_Time'] = df_626.index

In [41]:
allcruises = pd.concat([df_598, df_609, df_614, df_626], axis = 0, ignore_index=True)
allcruises['Date'] = pd.to_datetime(allcruises['Date_Time'].dt.date)
allcruises['Month'] = allcruises['Date_Time'].dt.month

In [8]:
# interpolate Thies solar, thies_3_temp and thies_Wind speed grouped by day

allcruises['Thies_Solar_Radiation_SMP6_AVG_interp'] = allcruises.groupby('Date')['Thies_Solar_Radiation_SMP6_AVG'].apply(lambda group: group.interpolate()).reset_index(level=0, drop=True)
allcruises['Thies_Temperature_3_AVG_interp'] = allcruises.groupby('Date')['Thies_Temperature_3_AVG'].apply(lambda group: group.interpolate()).reset_index(level=0, drop=True)
allcruises['Thies_Wind_Speed_CUS_AVG_interp'] = allcruises.groupby('Date')['Thies_Wind_Speed_CUS_AVG'].apply(lambda group: group.interpolate()).reset_index(level=0, drop=True)


In [None]:
grouped = allcruises.groupby('Date') 
for date, data in grouped:
    plt.figure()
    plt.scatter(data['Date_Time'], data['Thies_Wind_Speed_CUS_AVG']) #Thies_Temperature_3_AVG
    plt.title(data['Date'].iloc[0])
    plt.show()

In [None]:
grouped = allcruises.groupby('Date') 
missing_campbell = grouped['Thies_Wind_Speed_CUS_AVG'].apply(lambda x : x.isna().all()).sum()
missing_Thies_3 = grouped['Thies_Temperature_3_AVG'].apply(lambda x : x.isna().all()).sum()

print(f'number of days with misisng Campbell wind: {missing_campbell} \nnumber of days with missing Thies_3: {missing_Thies_3}')

#5.5.: npo campbell
# 5.5. no thies


number of days with misisng Campbell wind: 1 
number of days with missing Thies_3: 1


In [15]:
allcruises['sml-bulk'] = allcruises['CTD_Temperature_SML'] - allcruises['CTD_Temperature_100cm']
allcruises['air-bulk'] = allcruises['Campbell_Air_Temperature'] - allcruises['CTD_Temperature_100cm'] # choose airtemp

#Campbell_Air_Temperature
#Thies_Temperature_3_AVG
#Thies_Temperature_3_AVG_interp

In [16]:
# plot SML-bulk / air-bulk

df = allcruises#[allcruises['cruise'] == 'HE626']  # plot everything or only 1 cruise

plt.figure(figsize = (12,8))
sns.scatterplot(data=df, x = 'air-bulk', y = 'sml-bulk', hue = 'Month',     # hue as 'Thies_Solar_Radiation_SMP6_AVG_interp' or 'Wind_Speed_U10', or 'Month', 'Date', ...
                # style = 'Month', 
                palette='viridis', s = 30,
            zorder = 2)

plt.axvline(0, color = 'black', zorder = 1)
plt.axhline(0, color = 'black', zorder = 1)


plt.ylabel('Temperature difference SML-bulk [°C]' ,fontsize = 20)
plt.tick_params(axis='x', labelsize=20)
plt.tick_params(axis='y', labelsize=20)
plt.xlabel('Temperature difference Air-bulk [°C]', fontsize = 20)
plt.legend(fontsize = 20,
             title = 'Month',
              title_fontsize = 20,
              markerscale = 2)
plt.show()
plt.show()

In [30]:
df = allcruises[allcruises['Date'] == '2023-03-17']
plt.figure()
plt.scatter(df.index, df['Campbell_Air_Temperature'])
plt.scatter(df.index, df['Thies_Temperature_3_AVG'])
plt.show()

In [None]:
dates_of_interest = ['2023-03-08', '2022-05-02', '2022-05-10', '2022-05-17',
                    #   '2023-08-05', 
                      '2022-10-10']
df_dates_of_interest = allcruises[allcruises['Date'].isin(pd.to_datetime(dates_of_interest))]
df_normal_dates = allcruises[~allcruises['Date'].isin(pd.to_datetime(dates_of_interest))]


'''
2023-03-08: thies - long tail with high SML-Air, no CTD:100cm data after 12:30. Thies weird in wind, temp, rad. 
    --> use campbell
2022-05-02: radiation - 1 outliers in beginning, Campbell-airtemp below watertemp, airtemp at 50cm above, check mission plan (west)
    --> cut start, check all thies temps
2022-05-10: high ctd_SML in beginning, high thies2 temp in beginning (might be right)
    --> 85cm for bulk?
2022-05-17: +- same grid twice. sml temp negative corr air temp. air 50 cm warmer than 3m, Sml warmer than 100
    --> check all thies temps, check precip
2022-08-05: funny locations, 
2022-10-10: ctd100 befor 13 -1C off, no change in loc, precip event -> decrease all temps
    --> cut start

2023-03.17: high difference between water / air temperature, air much warmer
'''

'\n2023-03-08: thies - long tail with high SML-Air, no CTD:100cm data after 12:30. Thies weird in wind, temp, rad. \n    --> use campbell\n2022-05-02: radiation - 1 outliers in beginning, Campbell-airtemp below watertemp, airtemp at 50cm above, check mission plan (west)\n    --> cut start, check all thies temps\n2022-05-10: high ctd_SML in beginning, high thies2 temp in beginning (might be right)\n    --> 85cm for bulk?\n2022-05-17: +- same grid twice. sml temp negative corr air temp. air 50 cm warmer than 3m, Sml warmer than 100\n    --> check all thies temps, check precip\n2022-08-05: funny locations, \n2022-10-10: ctd100 befor 13 -1C off, no change in loc, precip event -> decrease all temps\n    --> cut start\n\n2023-03.17: high dif water / air, air much warmer\n'

In [19]:
# normal dates
# df = allcruises[allcruises['Date'] == '2022-10-10']  # plot everything or only 1 cruise
df = df_normal_dates
plt.figure(figsize = (12,10))
sns.scatterplot(data=df, x = 'air-bulk', y = 'sml-bulk', hue = 'Thies_Solar_Radiation_SMP6_AVG_interp',     # hue as 'Thies_Wind_Speed_CUS_AVG_interp', 'Thies_Solar_Radiation_SMP6_AVG_interp' or 'Wind_Speed_U10', or 'Month', 'Date', ...
                # style = 'Month',
                  palette='inferno', s = 30,
            zorder = 2)

plt.axvline(0, color = 'black', zorder = 1)
plt.axhline(0, color = 'black', zorder = 1)


plt.ylabel('SML-bulk' ,fontsize = 20)
plt.tick_params(axis='x', labelsize=20)
plt.tick_params(axis='y', labelsize=20)
plt.xlabel('Air-bulk', fontsize = 20)
plt.legend(fontsize = 20,
             title = 'Solar Radiation [W/m\u00b2]',
              title_fontsize = 20,
              markerscale = 2)
plt.show()

# 08.03.
# 02.05.
# 10.05.
# 17.05.
# 05.08.
# 10.10.

# check 17.03. air

In [14]:
# temperatures
# temperatures = df_dates_of_interest.filter(regex = '^CTD_Temp') # or: 'CTD_Temperature_SML', 'CTD_Temperature_100cm', 'Campbell_Air_Temperature', 'Thies_Temperature_3_AVG'
temperatures = ['CTD_Temperature_SML', 'CTD_Temperature_100cm', 'Campbell_Air_Temperature', 'Thies_Temperature_3_AVG']
grouped = allcruises[allcruises['cruise'] == 'HE626'].groupby('Date') 
 
for date, data in grouped:
    plt.figure(figsize=(8, 6))
    for temp in temperatures: 
        plt.scatter(data['Date_Time'], data[temp], label=f'{temp}', alpha=0.6)
        plt.title(f"Temperatures for {date}")
        plt.xlabel('time')
        plt.ylabel('Temperature [°C]')
        plt.grid(True)
        plt.legend()
        plt.show()

In [20]:
# solar radiation
# radiation= df_dates_of_interest.filter(regex = 'Solar').columns.tolist() # or only net radiation: 
radiation = [ 'Solar_Radiation_Net', 'Thies_Solar_Radiation_SMP6_AVG_interp', 'Thies_Solar_Radiation_SMP6_AVG']
grouped = df_dates_of_interest.groupby('Date')
 
for date, data in grouped:
    plt.figure(figsize=(8, 6))
    for rad in radiation: 
        plt.scatter(data['Date_Time'], data[rad], label=f'{rad}', alpha=0.6)
        plt.title(f"Solar radiation for {date}")
        plt.xlabel('time')
        plt.ylabel('radiation')
        plt.grid(True)
        plt.legend()
        plt.show()


In [21]:
# show all wind things
wind = df_dates_of_interest.filter(regex = 'Wind_Direction').columns.tolist() # regex = 'Wind_Speed', or 'Wind_Direction', or only:
# wind = ['Wind_Speed_U10', 'Thies_Wind_Speed_CUS_AVG']
grouped = df_dates_of_interest.groupby('Date')
 
for date, data in grouped:
    plt.figure(figsize=(8, 6))
    for p in wind: 
        plt.scatter(data.index, data[p], label=f'{p}', alpha=0.6)
        plt.title(f"wind for {date}")
        plt.xlabel('time')
        plt.ylabel('windspeed or direction')
        plt.grid(True)
        plt.legend()
        plt.show()

In [31]:
# plot temperatures
date = '2022-05-17'
df = allcruises[allcruises['Date'] == date]

plt.figure(figsize =(8,6))
plt.scatter(df['Date_Time'], df['sml-bulk'], label = 'SML-Bulk')
plt.scatter(df['Date_Time'], df['air-bulk'], label = 'Air-Bulk')
plt.scatter(df['Date_Time'], df['CTD_Temperature_100cm'], label = 'CTD_100')
plt.scatter(df['Date_Time'], df['CTD_Temperature_SML'], label = 'CTD_SML')
plt.scatter(df['Date_Time'], df['Thies_Temperature_3_AVG'], label = 'Air')
# plt.scatter(df['Date_Time'], df['Precipitation_Intensity'])

plt.ylabel('Temperature' ,fontsize = 15)
plt.tick_params(axis='x', labelsize=15)
plt.tick_params(axis='y', labelsize=15)
plt.xlabel('Time', fontsize = 15)
plt.legend()
plt.show()


In [None]:
# thies-temperatures
thies_sensors = ['CUS', '5', '2', '4', '3']  # CUS and 5 at 200cm, 2 at 150, 4 at 100, 3 at 50
grouped = df_dates_of_interest.groupby('Date')
 
for date, data in grouped:
    plt.figure(figsize=(8, 6))
    plt.scatter(data['Date_Time'], data['Campbell_Air_Temperature'], label = 'Campbell')
    for sensor in thies_sensors: 
        plt.scatter(data['Date_Time'], data[f'Thies_Temperature_{sensor}_AVG'], label=f'{sensor}', alpha=0.6)
        plt.title(f"Thies_temperatures {date}")
        plt.xlabel('time')
        plt.ylabel('Temperature [°C]')
        plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
# Precipitation
precips = ['Precipitation_Intensity', 'Precipitation_Amount']
grouped = df_dates_of_interest.groupby('Date')
 
for date, data in grouped:
    plt.figure(figsize=(8, 6))
    for precip in precips: 
        plt.scatter(data['Date_Time'], data[precip], label=f'{precip}', alpha=0.6)
        plt.title(f"Precipitation {date}")
        plt.xlabel('time')
        plt.ylabel('Precipitation')
        plt.grid(True)
        plt.legend()
        plt.show()

In [24]:
# plot everything for one day
date = '2023-03-09'
df = allcruises[allcruises['Date'] == date]

# chose what to show:

# temperatures = df.filter(regex = '^CTD_Temp') # or:
temperatures = ['CTD_Temperature_SML', 'CTD_Temperature_100cm', 'Campbell_Air_Temperature', 'Thies_Temperature_3_AVG']

# radiation= df.filter(regex = 'Solar').columns.tolist() # or only net radiation: 
radiation = [ 'Solar_Radiation_Net', 'Thies_Solar_Radiation_SMP6_AVG_interp', 'Thies_Solar_Radiation_SMP6_AVG']

# wind = df.filter(regex = 'Wind_Speed').columns.tolist() # regex = 'Wind_Speed', or 'Wind_Direction', or:
wind = ['Wind_Speed_U10', 'Campbell_Wind_Speed','Thies_Wind_Speed_CUS_AVG'] 

precipitation = df.filter(regex = 'Precipitation')




# where was it?
plt.figure(figsize = (8,6))
sns.scatterplot(data=df, x = 'air-bulk', y = 'sml-bulk', hue = 'Date',     # hue as 'Thies_Solar_Radiation_SMP6_AVG_interp' or 'Wind_Speed_U10', or 'Month', 'Date', ...
                palette='inferno', s = 20,
            zorder = 2)

plt.axvline(0, color = 'black', zorder = 1)
plt.axhline(0, color = 'black', zorder = 1)


plt.ylabel('SML-bulk' ,fontsize = 15)
plt.tick_params(axis='x', labelsize=15)
plt.tick_params(axis='y', labelsize=15)
plt.xlabel('Air-bulk', fontsize = 15)
plt.show()

# as time series
plt.figure(figsize =(8,6))
plt.scatter(df['Date_Time'], df['sml-bulk'], label = 'sml-bulk')
plt.scatter(df['Date_Time'], df['air-bulk'], label = 'air-bulk')
plt.ylabel('T-difference' ,fontsize = 15)
plt.tick_params(axis='x', labelsize=15)
plt.tick_params(axis='y', labelsize=15)
plt.xlabel('Time', fontsize = 15)
plt.legend()
plt.show()

# Locations 
plt.figure(figsize=(10, 6))
sc = plt.scatter(df['Longitude'], df['Latitude'], 
                #  c = df['Date_Time'], 
                c = 'skyblue',
                 label=f'Data for {date}', alpha=0.6)
plt.text(df['Longitude'].min(), df['Latitude'].max(), f'Start: {df.Date_Time.dt.hour.iloc[0]}:{df.Date_Time.dt.minute.iloc[0]} UTC', color = 'red',ha='left', va='center', fontsize=20)
plt.text(df['Longitude'].max(), df['Latitude'].max(), f'End: {df.Date_Time.dt.hour.iloc[-1]}:{df.Date_Time.dt.minute.iloc[-1]} UTC',color = 'blue', ha='right', va='center', fontsize=20)
plt.title(f"Mission for {date}", fontsize = 25)
plt.scatter(df['Longitude'].iloc[0], df['Latitude'].iloc[0], s = 100, c = 'red')
plt.scatter(df['Longitude'].iloc[-1], df['Latitude'].iloc[-1], s = 100, c = 'blue')
# cbar = plt.colorbar(sc)
# cbar.set_label('CTD Temperature 30cm [°C]')
plt.xlabel('Longitude', fontsize = 20)
plt.ylabel('Latitude', fontsize = 20)
plt.grid(True)
# plt.legend()
plt.show()

# Temperature
plt.figure(figsize=(8, 6))
for temp in temperatures: 
    plt.scatter(df['Date_Time'], df[temp], label=f'{temp}', alpha=0.6)
    plt.title(f"Temperatures for {date}")
    plt.xlabel('time')
    plt.ylabel('Temperature [°C]')
    plt.grid(True)
    plt.legend()
    plt.show()

 
# Radiation
plt.figure(figsize=(8, 6))
for rad in radiation: 
    plt.scatter(df['Date_Time'], df[rad], label=f'{rad}', alpha=0.6)
    plt.title(f"Radiation for {date}")
    plt.xlabel('time')
    plt.ylabel('radiation')
    plt.grid(True)
    plt.legend()
    plt.show()

# Wind
plt.figure(figsize=(8, 6))
for p in wind: 
    plt.scatter(df['Date_Time'], df[p], label=f'{p}', alpha=0.6)
    plt.title(f"Wind for {date}")
    plt.xlabel('time')
    plt.ylabel('windspeed or direction')
    plt.grid(True)
    plt.legend()
    plt.show()

# Precipitation
plt.figure(figsize =(8,6))
for precip in precipitation:
    plt.scatter(df['Date_Time'], df[precip], label = f'{precip}')
    plt.title(f"Precipitation for {date}")
    plt.xlabel('time')
    plt.ylabel('Precipitation')
    plt.grid(True)
    plt.legend()
    plt.show()


In [32]:
for date in np.unique(allcruises['Date']):
    df = allcruises[allcruises['Date'] == date]
    try:
        plt.figure()
        plt.hist(df['Quality_Thies_Temperature_5_AVG'])
        plt.show()
    except ValueError:
        print(f'no Quality flags for {date}')

# all quality for Thies Temperature probably good

no Quality flags for 2022-05-05T00:00:00.000000000


In [26]:
df_598_17 = allcruises[allcruises['Date'] == '2022-05-17']
df_598_17_1 = df_598_17[df_598_17['Date_Time'] < '2022-05-17 11:00']
df_598_17_2 = df_598_17[df_598_17['Date_Time'] > '2022-05-17 11:00']

In [19]:
 # plot all lat lons
df = allcruises
grouped = df.groupby('Date')

for date, data in grouped:
    plt.figure(figsize=(8, 6))
    sc = plt.scatter(data['Longitude'], data['Latitude'],  c = data['CTD_Temperature_30cm'], label=f'Data for {date}', alpha=0.6)
    plt.text(data['Longitude'].min(), data['Latitude'].min(), f'Start: {data.Date_Time.dt.hour.iloc[0]}:{data.Date_Time.dt.minute.iloc[0]}', color = 'red',ha='left', va='center', fontsize=20)
    plt.text(data['Longitude'].max(), data['Latitude'].min(), f'End: {data.Date_Time.dt.hour.iloc[-1]}:{data.Date_Time.dt.minute.iloc[-1]}',color = 'blue', ha='right', va='center', fontsize=20)
    plt.title(f"Longitude vs Latitude for {date}")
    plt.scatter(data['Longitude'].iloc[0], data['Latitude'].iloc[0], s = 100, c = 'red')
    plt.scatter(data['Longitude'].iloc[-1], data['Latitude'].iloc[-1], s = 100, c = 'blue')
    cbar = plt.colorbar(sc)
    cbar.set_label('CTD Temperature 30cm [°C]')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.grid(True)
    plt.legend()
    # plt.savefig(f"C:/Users/piago/Documents/Uni/5_Semester/Hiwi/Plots/grid_{date}.png")
    plt.show()

In [49]:
# average wind speed each day
print('Mean Windspeed 10 m')
allcruises.groupby('Date')['Wind_Speed_U10'].mean()

Mean Windspeed 10 m


Date
2022-05-02    1.870873
2022-05-05    5.212753
2022-05-06    5.104095
2022-05-07    5.130642
2022-05-09    3.994729
2022-05-10    6.918919
2022-05-14    5.721792
2022-05-17    1.770685
2022-05-18    2.521153
2022-05-19    4.389318
2022-05-20    2.652267
2022-10-05    8.250623
2022-10-10         NaN
2022-10-12         NaN
2022-10-14         NaN
2022-10-17    7.682324
2022-10-21    5.408773
2022-10-22    8.333761
2022-10-23    7.008002
2023-03-02    5.119673
2023-03-03    6.670655
2023-03-08    3.129510
2023-03-09    2.657068
2023-03-10    9.028155
2023-03-11    8.529922
2023-03-17    3.988371
2023-03-18    3.856098
2023-03-19    5.402730
2023-03-20    6.640493
2023-07-21         NaN
2023-07-22         NaN
2023-07-23         NaN
2023-07-24         NaN
2023-07-27         NaN
2023-07-28         NaN
2023-07-29         NaN
2023-08-02         NaN
2023-08-03         NaN
2023-08-05         NaN
Name: Wind_Speed_U10, dtype: float64

In [None]:
# Define FFT function
def plot_fft(signal, sampling_interval, label):
n = len(signal)
fft_values = fft(signal)
fft_freq = fftfreq(n, d=sampling_interval)
plt.plot(fft_freq[:n // 2], np.abs(fft_values)[:n // 2], label=label)