# Historical data from Masfjorden & Lurefjorden

#### Historical data are available from

https://metadata.nmdc.no/metadata-api/landingpage/fe72be884c2dcba60b0211e27717aa23
    

This Jupyter Notebooks shows how you can load data from netCDF files.

In [1]:
from datetime import datetime
import os
import sys
import numpy as np
import gsw
import matplotlib.pyplot as plt
import xarray as xr

# Working directory 
os.chdir('C:/Users/miw005/OneDrive - University of Bergen/Undervisning2024') 

Load data from netCDF file

In [2]:
# give filepath to the 
file_path = 'C:/Users/miw005/OneDrive - University of Bergen/ScriptsAndData/Data/NMDC data/KB2023006004.nc'
data = xr.open_dataset(file_path) # load dataset using xarray

# Information about the data file
print(data.summary)


The file contains temperature, practical salinity, and oxygen measurements binned into 1 db pressure bins. The raw data was measured using a Seabird SBE 11plus V 5.2 CTD and post-processed with the Seasoft software package following IMR, Norway standard procedures. Only data from the downcast of the profile was used. If available, water sample data (DIC, TA, nutrients, Oxygen) collected during the cruise is included, as well as (uncalibrated) profiles of par, spar, transmission and flourescence.


In [3]:
# See variables, dimentions etc
print(data)
#print(data.var)
#data.variables.keys() 

<xarray.Dataset>
Dimensions:                  (Pressure: 682, Nstations: 60, Niskin: 10)
Coordinates:
  * Pressure                 (Pressure) float64 1.0 2.0 3.0 ... 681.0 682.0
  * Nstations                (Nstations) int32 1 2 3 4 5 6 ... 55 56 57 58 59 60
  * Niskin                   (Niskin) int32 1 2 3 4 5 6 7 8 9 10
Data variables: (12/49)
    t                        (Pressure, Nstations) float64 ...
    t_flag                   (Pressure, Nstations) int16 ...
    sp                       (Pressure, Nstations) float64 ...
    sp_flag                  (Pressure, Nstations) int16 ...
    raw_o                    (Pressure, Nstations) float64 ...
    raw_o_flag               (Pressure, Nstations) int16 ...
    ...                       ...
    NO2                      (Niskin, Nstations) float64 ...
    NO2_flag                 (Niskin, Nstations) float64 ...
    PO4                      (Niskin, Nstations) float64 ...
    PO4_flag                 (Niskin, Nstations) float64 ...
  

In [4]:
# See all available variables in the netCDF file.
varis = [i for i in data.data_vars]
print(varis)

['t', 't_flag', 'sp', 'sp_flag', 'raw_o', 'raw_o_flag', 'raw_c', 'raw_c_flag', 'raw_fl', 'raw_fl_flag', 'raw_par', 'raw_par_flag', 'raw_spar', 'raw_spar_flag', 'raw_trans', 'raw_trans_flag', 'sta', 'lon', 'lat', 'time', 'file', 'BTL_t_draw', 'BTL_t_draw_flag', 'BTL_p', 'BTL_p_flag', 'BTL_s', 'BTL_s_flag', 'BTL_t', 'BTL_t_flag', 'BTL_o', 'BTL_o_flag', 'BTL_fl', 'BTL_fl_flag', 'DIC', 'DIC_flag', 'Alc', 'Alc_flag', 'O2_Winkler', 'O2_Winkler_flag', 'O2_Winkler_doubles', 'O2_Winkler_doubles_flag', 'NO3', 'NO3_flag', 'NO2', 'NO2_flag', 'PO4', 'PO4_flag', 'Si', 'Si_flag']


In [6]:
#Dimentions
p = np.array(data['Pressure']) # pressure
#s = np.array(data['Nstations'])
#n = np.array(data['Niskin'])

print('number of depth levels: ' + str(len(p)))
#print('number of stations: ' + str(len(s)))
#print('number of niskins: ' + str(len(n)))


number of depth levels: 682



#### Function for loading CTD data from the netCDF file to a dictionary compatible with GFPy

In [7]:
def CTDdata_from_netCDF(filepath, bottledata):
    
    """
    Read CTD from netCDF files (historical data) and store in a dictionary compatible with GFPy package.
    
    Input: 
        filepath - path to netCDF file with CTD data
        bottledata - choose if you want bottledata ('y') included in the dictionary or not ('n')
        
    Output:
        CTD_cruise - CTD data in dictionary compatible with GFPy
    
    """
    
    # Load data
    data = xr.open_dataset(filepath) # open data set from given filepath
    p = np.array(data['Pressure']) # get pressure dimention
    all_station_numbers = np.array(data['sta'])# array with all station numbers
    varis = [i for i in data.data_vars]    # List all variables available from the cruise    

    CTD_cruise = {} # Dictionary to store CTD data    
    for STA in all_station_numbers: # for each station in the netCDF file
        ctd_d = {}
        sta = np.where(all_station_numbers == STA)[0][0]
        
        # get time
        date_string = str(np.array(data['time'][sta]))
        time = datetime.strptime(date_string, '%Y-%m-%d %H:%M')
        
        # Get lat, lon
        lat = float(data['lat'][sta])
        lon = float(data['lon'][sta])
        
        # Get CTD data from sta (depths), identify depths with data & remove depths without data
        tdata = np.array(data['t'][:,sta]) 
        sdata = np.array(data['sp'][:, sta])
        
        nanremove = ~np.isnan(tdata) 
        pres = p[nanremove] 
        t = tdata[nanremove]
        sp = sdata[nanremove]

        # Oxygen data
        if 'o' in varis: # check if oxygen data is avaliable
            odata = np.array(data['o'][:,sta]) 
            o = odata[nanremove]
        else:
            ordata = np.array(data['raw_o'][:,sta])  # raw oxygen data
            oraw = ordata[nanremove]        
     
        # Calulate SA, CT and SIGMA
        sa = gsw.SA_from_SP(sp, pres,lon,lat)
        ct = gsw.CT_from_t(sa,t,pres)
        sigma0 = gsw.sigma0(sa,ct)
        z = gsw.z_from_p(pres, lat)
        
        # Save variables in dictionary (gfpy compatible)
        ctd_d['cruise'] = data.cruise
        ctd_d['st'] = STA
        ctd_d['time'] = time
        ctd_d['LAT'] = lat
        ctd_d['LON'] = lon
        ctd_d['p'] = pres
        ctd_d['z'] = z
        ctd_d['BottomDepth'] = np.nanmin(z)+10
        ctd_d['CT'] = ct
        ctd_d['SA'] = sa
        ctd_d['SIGTH'] = sigma0
        ctd_d['S'] = sp
        ctd_d['T'] = t
     
        if 'o' in varis: # check if oxygen data is avaliable
            ctd_d['OX'] = o
            ctd_d['OX unit'] = 'umol/kg'
        else:
            ctd_d['OX raw']= oraw
            ctd_d['OX raw unit'] = 'mL/L'
            
        if bottledata == 'y':        # check if bottle oxygen (and other variables) are avaliable, and add bottle data
            if 'BTL_p' in varis: 
                btl_depth = np.array(data['BTL_p'][:,sta]) 
                ctd_d['btl_depth'] = btl_depth
                
            if 'O2_Winkler' in varis:
                btl_ox = np.array(data['O2_Winkler'][:,sta])  
                ctd_d['btl_ox'] = btl_ox
          
            if 'DIC' in varis:
                btl_DIC = np.array(data['DIC'][:,sta])  
                ctd_d['btl_DIC'] = btl_DIC
          
            if 'Alc' in varis:
                btl_Alc = np.array(data['Alc'][:,sta])  
                ctd_d['btl_Alc'] = btl_Alc
                
            if 'NO3' in varis:
                btl_NO3 = np.array(data['NO3'][:,sta])  
                ctd_d['btl_NO3'] = btl_NO3
                
            if 'NO2' in varis:
                btl_NO2 = np.array(data['NO2'][:,sta])  
                ctd_d['btl_NO2'] = btl_NO2
            
            if 'PO4' in varis:
                btl_PO4 = np.array(data['PO4'][:,sta])  
                ctd_d['btl_PO4'] = btl_PO4
                
            if 'Si' in varis:
                btl_Si = np.array(data['Si'][:,sta]) 
                ctd_d['btl_Si'] = btl_Si
                
        CTD_cruise[STA] = ctd_d
        
    return CTD_cruise




In [8]:
# Use the function above to load the data into a dictionary (which can be used together with the GFPy functions)
CTD_cruise = CTDdata_from_netCDF(file_path, bottledata='y')


In [9]:
print(CTD_cruise.keys())

dict_keys([159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218])


In [10]:
station = 200
CTD_cruise[station].keys()

dict_keys(['cruise', 'st', 'time', 'LAT', 'LON', 'p', 'z', 'BottomDepth', 'CT', 'SA', 'SIGTH', 'S', 'T', 'OX raw', 'OX raw unit', 'btl_depth', 'btl_ox', 'btl_DIC', 'btl_Alc', 'btl_NO3', 'btl_NO2', 'btl_PO4', 'btl_Si'])

The data stored in the "CTD_cruise" dictionary can now be used together with the functions from the GFPy package. However, errors might occur (e.g. due to updates that are not compatible).

One way to work around this / to slove this is to:
1. go to the source code: https://github.com/jakobdoerr/GFPy/blob/master/GFPy/Ocean.py
2. search for the function you want to use
3. copy the code into your script & edit as needed to make the function work



Want to look several cruises?






In [11]:
CTD_Master = {}

# fiel path to folder with several netCDF files
files = os.listdir('C:/Users/miw005/OneDrive - University of Bergen/ScriptsAndData/Data/NMDC data')

for i in files:
    path = 'C:/Users/miw005/OneDrive - University of Bergen/ScriptsAndData/Data/NMDC data/' + i
    test_CTD = CTDdata_from_netCDF(path, bottledata = 'y')
    
    sts = [i for i in test_CTD]
    CTD_Master[test_CTD[sts[0]]['cruise']] = test_CTD


In [12]:
CTD_Master.keys()

dict_keys(['DFN2021460', 'GOS2012115', 'GOS2014117', 'GOS2017114', 'GOS2018111', 'GOS2019114', 'GOS2022112', 'GOS2023001003', 'GOS2023001013', 'HB2023009005', 'HM2011622', 'HM2013624', 'HM2015620', 'HM2016619', 'KB2019602', 'KB2019604', 'KB2020603', 'KB2021604', 'KB2021605', 'KB2021618', 'KB2021620', 'KB2022602', 'KB2022605', 'KB2022614', 'KB2022623', 'KB2023006004', 'KB2023006015'])

In [13]:
CTD_Master['KB2021605'].keys()

dict_keys([232, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383])

In [14]:
CTD_Master['KB2021605'][220].keys()

dict_keys(['cruise', 'st', 'time', 'LAT', 'LON', 'p', 'z', 'BottomDepth', 'CT', 'SA', 'SIGTH', 'S', 'T', 'OX', 'OX unit', 'btl_depth', 'btl_ox', 'btl_DIC', 'btl_Alc', 'btl_NO3', 'btl_NO2', 'btl_PO4', 'btl_Si'])