Temperature and Salinity data processing and QC
collect files to go into the gridding, eg
copy ..\..\cloudstor\SOTS-Temp-Raw-Data\netCDF\IMOS*FV01*SOFS-10*.nc .
mkdir resample
python3 ocean_dp\processing\down_sample.py IMOS*.nc
This creates the netCDF file like
netcdf IMOS_DWM-SOTS_COPST_20210420_SOFS_FV02_SOFS-10-2021-SBE37SMP-ODO-RS232-03709513-125m-mean_END-20220512_C-20220706 {
dimensions:
TIME = 9299 ;
variables:
double TIME(TIME) ;
TIME:long_name = "time" ;
TIME:units = "days since 1950-01-01 00:00:00 UTC" ;
TIME:calendar = "gregorian" ;
TIME:axis = "T" ;
TIME:standard_name = "time" ;
TIME:valid_max = 90000. ;
TIME:valid_min = 0. ;
double NOMINAL_DEPTH ;
NOMINAL_DEPTH:axis = "Z" ;
NOMINAL_DEPTH:long_name = "nominal depth" ;
NOMINAL_DEPTH:positive = "down" ;
NOMINAL_DEPTH:reference_datum = "sea surface" ;
NOMINAL_DEPTH:standard_name = "depth" ;
NOMINAL_DEPTH:units = "m" ;
NOMINAL_DEPTH:valid_max = 12000. ;
NOMINAL_DEPTH:valid_min = -5. ;
double LATITUDE ;
LATITUDE:axis = "Y" ;
LATITUDE:long_name = "latitude" ;
LATITUDE:reference_datum = "WGS84 geographic coordinate system" ;
LATITUDE:standard_name = "latitude" ;
LATITUDE:units = "degrees_north" ;
LATITUDE:valid_max = 90. ;
LATITUDE:valid_min = -90. ;
double LONGITUDE ;
LONGITUDE:axis = "X" ;
LONGITUDE:long_name = "longitude" ;
LONGITUDE:reference_datum = "WGS84 geographic coordinate system" ;
LONGITUDE:standard_name = "longitude" ;
LONGITUDE:units = "degrees_east" ;
LONGITUDE:valid_max = 180. ;
LONGITUDE:valid_min = -180. ;
float CNDC(TIME) ;
CNDC:_FillValue = NaNf ;
CNDC:comment = "Conductivity [S/m]" ;
CNDC:units = "S/m" ;
CNDC:calibration_SerialNumber = "9513" ;
CNDC:calibration_CalibrationDate = "17-Nov-20" ;
CNDC:calibration_UseG_J = 1. ;
CNDC:calibration_G = -0.9750336 ;
CNDC:calibration_H = 0.1390885 ;
CNDC:calibration_I = -0.0003036082 ;
CNDC:calibration_J = 4.247957e-05 ;
CNDC:calibration_CPcor = -9.57e-08 ;
CNDC:calibration_CTcor = 3.25e-06 ;
CNDC:calibration_WBOTC = 4.084195e-07 ;
CNDC:calibration_Slope = 1. ;
CNDC:calibration_Offset = 0. ;
CNDC:name = "Water conductivity" ;
CNDC:standard_name = "sea_water_electrical_conductivity" ;
CNDC:long_name = "sea_water_electrical_conductivity" ;
CNDC:valid_min = 0.f ;
CNDC:valid_max = 40.f ;
CNDC:coordinates = "TIME LATITUDE LONGITUDE NOMINAL_DEPTH" ;
CNDC:ancillary_variables = "CNDC_quality_control CNDC_standard_error CNDC_number_of_observations" ;
float CNDC_standard_error(TIME) ;
CNDC_standard_error:_FillValue = NaNf ;
CNDC_standard_error:comment = "sample bin standard deviation" ;
float CNDC_number_of_observations(TIME) ;
CNDC_number_of_observations:_FillValue = NaNf ;
CNDC_number_of_observations:comment = "number of samples" ;
byte CNDC_quality_control(TIME) ;
CNDC_quality_control:_FillValue = 99b ;
CNDC_quality_control:comment = "maximum of quality flags of input data" ;
This can be used directly in MatLAB
% time variable
time = ncread(fn, 'TIME') + datetime(1950,1,1);
% read the temperature from the file and some of its metadata
temp = ncread(fn, 'TEMP');
temp_qc = ncread(fn, 'TEMP_quality_control');
temp_name = ncreadatt(fn, 'TEMP', 'long_name');
temp_units = ncreadatt(fn, 'TEMP', 'units');
msk = temp_qc <= 2; % create a mask for only good data
plot(time(msk), temp(msk)); % plot only good data
This can include any other data sources (PAR, FLNTUS, pCO2, ...)
python3 ocean_dp\dbms\load_sqlite_db.py resample\IMOS*.nc
python3 ocean_dp\dbms\create_from_sqlite_db.py SOFS-9-2020.sqlite
python3 ocean_dp\processing\calc_depth.py SOFS-10-2021.sqlite.nc
python3 ocean_dp\processing\calc_mld.py SOFS-10-2021.sqlite.nc
This can be used directly in MatLAB,
fn = 'SOFS-10-2021.sqlite.nc'; % contains the file to use
var = 'TEMP'; % just for convienence
% time variable
time = ncread(fn, 'TIME') + datetime(1950,1,1);
% pressure variable (PRES_ALL has presssure for every instrument)
pres = ncread(fn, 'PRES_ALL');
temp = ncread(fn, var);
var_name = ncreadatt(fn, var, 'long_name');
var_units = ncreadatt(fn, var, 'units');
temp_idx = ncread(fn, ['IDX_' var]);
nominal_depth = ncread(fn, 'NOMINAL_DEPTH');
temp_depth = nominal_depth(temp_idx+1);
pres_temp = pres(:, temp_idx+1); % contains a pressure at each timestamp
instrument = ncread(fn, 'INSTRUMENT');
instrument_temp = instrument(:,temp_idx+1)'; % contains the instrument the data came from
files = dir("*.sqlite.nc"); % get list of files to use
figure(1); clf;
std_level = [0:5:100, 125:25:500, 550:50:2000, 2100:100:5500];
std_level_gap = diff(std_level);
std_level(1) = 0.5;
edges = std_level - [0 std_level_gap]/2;
edges(1) = -10;
% create storage for output variables
time_flat = [];
temp_flat = [];
pres_flat = [];
mld = [];
time_n = [];
var='PSAL';
for a = 1:numel(files)
fn = [files(a).folder '/' files(a).name];
disp(files(a).name);
% time variable
time = ncread(fn, 'TIME') + datetime(1950,1,1);
% pressure variable (PRES_ALL has presssure for every instrument)
pres = ncread(fn, 'PRES_ALL');
try
% temperature variable
temp = ncread(fn, var);
var_name = ncreadatt(fn, var, 'long_name');
var_units = ncreadatt(fn, var, 'units');
temp_idx = ncread(fn, ['IDX_' var]);
nominal_depth = ncread(fn, 'NOMINAL_DEPTH');
temp_depth = nominal_depth(temp_idx+1);
% create a flat array of time, temperature and pressure
% so we have a vector of times, temperature, pressure observations
time_mat = repmat(time, size(temp_depth, 1), 1);
time_flat = [time_flat; reshape(time_mat, 1, [])'];
temp_flat = [temp_flat; reshape(temp, [] ,1)];
pres_temp = pres(:, temp_idx+1);
pres_flat = [pres_flat; reshape(pres_temp, [] ,1)];
mld_n = nan(size(time));
try
mld_n = ncread(fn, 'MLD');
catch
end
mld = [mld; mld_n];
time_n = [time_n; time];
catch
end
end
time_grid = min(time_flat):hours(24):max(time_flat);
%pres_flat(pres_flat>4500) = nan;
% find bin for each depth
d_disc = discretize(pres_flat, edges);
% find bin for each time
t_disc = discretize(time_flat, time_grid);
% mask only good current data, within time and depth grid
msk = not(isnan(d_disc) | isnan(t_disc));% | isnan(temp_flat));
% create a vector of subscripts within the time/depth_grid that the sample goes into
subs = [t_disc(msk) d_disc(msk)];
% find mean within each bin
temp_binned = accumarray(subs, temp_flat(msk), [size(time_grid,2) size(edges,2)], @mean, nan);
pres_binned = accumarray(subs, pres_flat(msk), [size(time_grid,2) size(edges,2)], @mean, nan);
% take list of time gridded files,
% grid into 'standard' depth levels, and standard times
files = dir("SOFS*.sqlite.nc");
figure(1); clf;
figure(2); clf;
% setup standard level grid
std_level = [0:5:100, 125:25:500, 550:50:2000, 2100:100:5500];
std_level_gap = diff(std_level);
std_level(1) = 0.5;
edges = std_level - [0 std_level_gap]/2;
edges(1) = -10;
% storage of output flat arrays
time_flat = [];
temp_flat = [];
pres_flat = [];
mld = [];
time_n = [];
var='TEMP';
for a = 1:numel(files)
fn = [files(a).folder '/' files(a).name];
disp(files(a).name);
% time variable
time = ncread(fn, 'TIME') + datetime(1950,1,1);
% pressure variable (PRES_ALL has presssure for every instrument)
pres = ncread(fn, 'PRES_ALL');
try
% temperature variable
temp = ncread(fn, var);
var_name = ncreadatt(fn, var, 'long_name');
var_units = ncreadatt(fn, var, 'units');
temp_idx = ncread(fn, ['IDX_' var]);
nominal_depth = ncread(fn, 'NOMINAL_DEPTH');
temp_depth = nominal_depth(temp_idx+1);
% create a flat array of time, temperature and pressure
% so we have a vector of times, temperature, pressure observations
time_mat = repmat(time, size(temp_depth, 1), 1);
time_flat = [time_flat; reshape(time_mat, 1, [])'];
temp_flat = [temp_flat; reshape(temp, [] ,1)];
pres_temp = pres(:, temp_idx+1);
pres_flat = [pres_flat; reshape(pres_temp, [] ,1)];
mld_n = nan(size(time));
try
mld_n = ncread(fn, 'MLD');
catch
end
mld = [mld; mld_n];
time_n = [time_n; time];
catch
end
end
time_grid = min(time_flat):hours(24)*10:max(time_flat);
%pres_flat(pres_flat>4500) = nan;
% find bin for each depth
d_disc = discretize(pres_flat, edges);
% find bin for each time
t_disc = discretize(time_flat, time_grid);
% mask only good current data, within time and depth grid
msk = not(isnan(d_disc) | isnan(t_disc));% | isnan(temp_flat));
% create a vector of subscripts within the time/depth_grid that the sample goes into
subs = [t_disc(msk) d_disc(msk)];
% find mean within each bin
temp_binned = accumarray(subs, temp_flat(msk), [size(time_grid,2) size(edges,2)], @mean, nan);
pres_binned = accumarray(subs, pres_flat(msk), [size(time_grid,2) size(edges,2)], @mean, nan);
% create a scatter plot of the binned data
figure(1); clf
for i=1:numel(std_level)
if ~all(isnan(temp_binned(:, i)))
scatter(time_grid, std_level(i)'-zeros(size(temp_binned(:, i))), 5, temp_binned(:, i), 'filled'); hold on
end
end
f=figure(1);
axis 'ij'
if (strcmp(var, 'TEMP'))
colormap('jet');
end
a=gca;
a.YTick=[10 20 50 100 200 500 1000 2000 5000];
a.YAxis.MinorTickValues = std_level;
grid on
ylim([0.4 6000]);
%a.YScale='log';
a.FontSize=8;
ylabel('pressure (dbar)');
xlim([datetime(2009,1,1) datetime(2022,10,1)])
f.Units = 'pixels';
f.Position(3:4) = [800 600];
c = colorbar;
c.Label.String = [strrep(var_name, '_', ' ') ' (' strrep(var_units, 'degrees_', '\circ ') ')'];
ylim([0 550]);
caxis([8 15]);
% create a binned mix-layer-depth
t_mld_disc = discretize(time_n, time_grid);
msk_mld = not(isnan(mld) | isnan(t_mld_disc));
mld_binned = accumarray(t_mld_disc(msk_mld), mld(msk_mld), [size(time_grid, 2) 1], @mean, nan);
plot(time_grid, mld_binned, 'Color', [0.5 0.5 0.5]);
% interpolate vertcially each timestep
interp_level = 5:10:500;
temp_interp = nan([size(interp_level, 2) size(temp_binned, 1)]);
for i=1:size(temp_binned,1)
good_data = ~isnan(temp_binned(i, :));
if (sum(good_data) > 2)
temp_interp(:, i) = interp1(std_level(good_data), temp_binned(i, good_data), interp_level, 'linear');
end
end
% create the vertical filled plot with contours
f=figure(2); clf
colormap('jet');
imagesc(datenum(time_grid), interp_level, temp_interp, 'AlphaData',~isnan(temp_interp)); axis 'ij'
datetick('x', 'keeplimits');
hold on
contour(datenum(time_grid), interp_level, temp_interp, [9 10 11 12], 'k');
c = colorbar;
c.Label.String = [strrep(var_name, '_', ' ') ' (' strrep(var_units, 'degrees_', '\circ ') ')'];
ylabel('pressure (dbar)');
f.Units = 'pixels';
f.Position(3:4) = [1800 600];
caxis([8 15]);
%print( figure(2), '-dpng', 'SOTS-Gridded-10d-Temp-contour.png' , '-r600');
%print( figure(1), '-dpng', ['SOTS-' var '-scatter-griddedx2.png'] , '-r600');
% out_file = 'SOFS-Gridded-All.nc';
%
% mySchema.Name = '/';
% mySchema.Format = 'classic';
% mySchema.Dimensions(1).Name = 'TIME';
% mySchema.Dimensions(1).Length = n_time;
% mySchema.Dimensions(2).Name = 'DEPTH';
% mySchema.Dimensions(2).Length = n_depth;
%
% mySchema.Variables(1).Name = 'TIME';
% mySchema.Variables(1).Dimensions(1).Name = 'TIME';
% mySchema.Variables(1).Dimensions(1).Length = n_time;
% mySchema.Variables(1).Datatype = 'double';
% mySchema.Variables(2).Name = 'DEPTH';
% mySchema.Variables(2).Dimensions(1).Name = 'DEPTH';
% mySchema.Variables(2).Dimensions(1).Length = n_depth;
% mySchema.Variables(2).Datatype = 'double';
% mySchema.Variables(3).Name = 'TEMP';
% mySchema.Variables(3).Dimensions(1).Name = 'TIME';
% mySchema.Variables(3).Dimensions(1).Length = n_time;
% mySchema.Variables(3).Dimensions(2).Name = 'DEPTH';
% mySchema.Variables(3).Dimensions(2).Length = n_depth;
% mySchema.Variables(3).Datatype = 'single';
%
% ncwriteschema(out_file, mySchema);
%
%
% ncwrite(out_file, 'TIME', datenum(time_grid) - datenum(1950,1,1));
% ncwrite(out_file, 'DEPTH', std_level);
% ncwrite(out_file, 'TEMP', temp_binned);