In [27]:
# Import relevant packages
import os
import numpy as np
import netCDF4 as nc
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import seaborn as sns
import datetime as dt
from netCDF4 import date2num, Dataset

In [2]:
# Print current directory
print(os.getcwd())
# Print list of files in directory
print(os.listdir())

/Users/joshblannin/OneDrive-UniversityofExeter (Archive)/Project/NN_DATA
['longhurst_v4_2010', 'Neural_network.ipynb', 'depth_cat.csv', '.DS_Store', 'mld_wClimatology.csv', 'Convert_to_netCDF.ipynb', 'nn_data.csv', 'pco2_complete.csv', 'models', 'sss_wClimatology.csv', 'sst_LR_flat.csv', 'data_investigation.ipynb', 'lhp_grid.csv', 'NN_data_with_regions_corrected.csv', 'variables_OLD.nc', 'Matching_data.ipynb', 'LHPs_original.csv', 'LHP_management.ipynb', '.ipynb_checkpoints', 'xco2_wClimatology.csv', 'chl_wClimatology.csv']


In [3]:
# Read chlorophyll-a data
chla_data = np.loadtxt('data/chl_wClimatology.csv', delimiter=',')
# Print shape of data as a check
print(f'Chlorophyll a data read - {chla_data.shape}')

Chlorophyll a data read - (69120, 360)


In [4]:
# Read mixed layer depth data
mld_data = np.loadtxt('data/mld_wClimatology.csv', delimiter=',')
# Print shape of data as a check
print(f'Mass Layer Depth data read - {mld_data.shape}')

Mass Layer Depth data read - (69120, 360)


In [5]:
# Read sea surface salinty data
sss_data = np.loadtxt('data/sss_wClimatology.csv', delimiter=',')
# Print shape of data as a check
print(f'Sea Surface Salinity data read - {sss_data.shape}')

Sea Surface Salinity data read - (69120, 360)


In [6]:
# Read sea surface temperature data
sst_data = np.loadtxt('data/sst_LR_flat.csv', delimiter=',')
# Print shape of data as a check
print(f'Sea Surface Temperature data read - {sst_data.shape}')

Sea Surface Temperature data read - (69120, 360)


In [7]:
# Read xCO2 data
xco2_data = np.loadtxt('data/xco2_wClimatology.csv', delimiter=',')
# Print shape of data as a check
print(f'Dry Mixing Ratio of CO2 data read - {xco2_data.shape}')

Dry Mixing Ratio of CO2 data read - (69120, 360)


In [8]:
# Read CO2 partial pressure data
pco2_data = np.loadtxt('data/pco2_SOCAT22.csv', delimiter=',')
# Print shape of data as a check
print(f'CO2 Partial Pressure data read - {pco2_data.shape}')

CO2 Partial Pressure data read - (69120, 360)


In [9]:
# Loop to organise data into 3D arrays
for r in range(0, pco2_data.shape[0], 180):
    print(f'{r} -> {r+179}')
    if r == 0:
        chla_stack = chla_data[r:r+180,:]
        mld_stack = mld_data[r:r+180,:]
        sss_stack = sss_data[r:r+180,:]
        sst_stack = sst_data[r:r+180,:]
        xco2_stack = xco2_data[r:r+180,:]
        pco2_stack = pco2_data[r:r+180,:]
    else:
        chla_stack = np.dstack((chla_stack,chla_data[r:r+180,:]))
        mld_stack = np.dstack((mld_stack,mld_data[r:r+180,:]))
        sss_stack = np.dstack((sss_stack,sss_data[r:r+180,:]))
        sst_stack = np.dstack((sst_stack,sst_data[r:r+180,:]))
        xco2_stack = np.dstack((xco2_stack,xco2_data[r:r+180,:]))
        pco2_stack = np.dstack((pco2_stack,pco2_data[r:r+180,:]))

# Print data stack shapes as a check
print(chla_stack.shape, mld_stack.shape, sss_stack.shape, sst_stack.shape, xco2_stack.shape, pco2_stack.shape)

0 -> 179
180 -> 359
360 -> 539
540 -> 719
720 -> 899
900 -> 1079
1080 -> 1259
1260 -> 1439
1440 -> 1619
1620 -> 1799
1800 -> 1979
1980 -> 2159
2160 -> 2339
2340 -> 2519
2520 -> 2699
2700 -> 2879
2880 -> 3059
3060 -> 3239
3240 -> 3419
3420 -> 3599
3600 -> 3779
3780 -> 3959
3960 -> 4139
4140 -> 4319
4320 -> 4499
4500 -> 4679
4680 -> 4859
4860 -> 5039
5040 -> 5219
5220 -> 5399
5400 -> 5579
5580 -> 5759
5760 -> 5939
5940 -> 6119
6120 -> 6299
6300 -> 6479
6480 -> 6659
6660 -> 6839
6840 -> 7019
7020 -> 7199
7200 -> 7379
7380 -> 7559
7560 -> 7739
7740 -> 7919
7920 -> 8099
8100 -> 8279
8280 -> 8459
8460 -> 8639
8640 -> 8819
8820 -> 8999
9000 -> 9179
9180 -> 9359
9360 -> 9539
9540 -> 9719
9720 -> 9899
9900 -> 10079
10080 -> 10259
10260 -> 10439
10440 -> 10619
10620 -> 10799
10800 -> 10979
10980 -> 11159
11160 -> 11339
11340 -> 11519
11520 -> 11699
11700 -> 11879
11880 -> 12059
12060 -> 12239
12240 -> 12419
12420 -> 12599
12600 -> 12779
12780 -> 12959
12960 -> 13139
13140 -> 13319
13320 -> 13499

In [12]:
# Define latitudes for netCDF
lats = [(y/10) for y in range(895,-905,-10)]
# Define longitudes for netCDF
lons = [(x/10) for x in range(-1795,1805,10)]

In [14]:
# Define dates for netCDF
dates = [] # Initialise empty list for storing dates
# Loop over years
for year in range(1990,2022):
    # Loop over months
    for month in range(1,13):
        # Append dates to list
        dates.append(dt.datetime(year,month,1))

# Convert dates to number of days since 01/01/1800
times = date2num(dates, 'days since 1800-01-01')
# Print length of times as a check
print(len(times)) # numeric values
# Print values as a check of order
print(times)

384
384


array([69396, 69427, 69455, 69486, 69516, 69547, 69577, 69608, 69639,
       69669, 69700, 69730, 69761, 69792, 69820, 69851, 69881, 69912,
       69942, 69973, 70004, 70034, 70065, 70095, 70126, 70157, 70186,
       70217, 70247, 70278, 70308, 70339, 70370, 70400, 70431, 70461,
       70492, 70523, 70551, 70582, 70612, 70643, 70673, 70704, 70735,
       70765, 70796, 70826, 70857, 70888, 70916, 70947, 70977, 71008,
       71038, 71069, 71100, 71130, 71161, 71191, 71222, 71253, 71281,
       71312, 71342, 71373, 71403, 71434, 71465, 71495, 71526, 71556,
       71587, 71618, 71647, 71678, 71708, 71739, 71769, 71800, 71831,
       71861, 71892, 71922, 71953, 71984, 72012, 72043, 72073, 72104,
       72134, 72165, 72196, 72226, 72257, 72287, 72318, 72349, 72377,
       72408, 72438, 72469, 72499, 72530, 72561, 72591, 72622, 72652,
       72683, 72714, 72742, 72773, 72803, 72834, 72864, 72895, 72926,
       72956, 72987, 73017, 73048, 73079, 73108, 73139, 73169, 73200,
       73230, 73261,

In [19]:
# Initialise netCDF to write
variables_netcdf = nc.Dataset('variables.nc', 'w', format='NETCDF4_CLASSIC')
# Define time, latitude, and longitude dimensions of netCDF
time_dim = variables_netcdf.createDimension('time', 384)
lat_dim = variables_netcdf.createDimension('lat', 180)
lon_dim = variables_netcdf.createDimension('lon', 360)

# Print dimension details as a check
for dim in variables_netcdf.dimensions.items():
    print(dim)

# Create latitude meta data
lat = variables_netcdf.createVariable('lat', np.float32, ('lat',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
# Create longitude meta data
lon = variables_netcdf.createVariable('lon', np.float32, ('lon',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
# Create time meta data
time = variables_netcdf.createVariable('time', np.float64, ('time',))
time.units = 'days since 1800-01-01'
time.long_name = 'time'

# Create chlorophyll variable and meta data 
chla_netcdf = variables_netcdf.createVariable('chl_a',np.float64,('lat','lon','time'))
chla_netcdf.standard_name = 'Chlorophyll-a concentration' # this is a CF standard name
chla_netcdf.units = 'mg m**-3'

# Create mixed layer depth variable and meta data 
mld_netcdf = variables_netcdf.createVariable('mld',np.float64,('lat','lon','time'))
mld_netcdf.standard_name = 'Mass Boundary Layer Depth' # this is a CF standard name
mld_netcdf.units = 'm'

# Create salinty variable and meta data 
sss_netcdf = variables_netcdf.createVariable('sss',np.float64,('lat','lon','time'))
sss_netcdf.standard_name = 'Sea Surface Salinity' # this is a CF standard name
sss_netcdf.units = 'psu'

# Create tempertature variable and meta data 
sst_netcdf = variables_netcdf.createVariable('sst',np.float64,('lat','lon','time'))
sst_netcdf.standard_name = 'Sea Surface Temperature' # this is a CF standard name
sst_netcdf.units = 'K'

# Create xCO2 variable and meta data 
xco2_netcdf = variables_netcdf.createVariable('xco2',np.float64,('lat','lon','time'))
xco2_netcdf.standard_name = 'Dry Mixing Ratio of CO2' # this is a CF standard name
xco2_netcdf.units = 'umol/mol'

# Create pCO2 variable and meta data 
pco2_netcdf = variables_netcdf.createVariable('pco2',np.float64,('lat','lon','time'))
pco2_netcdf.standard_name = 'Partial Pressure of CO2' # this is a CF standard name
pco2_netcdf.units = 'uatm'


lat[:] = np.asanyarray(lats) # Add latitude values
lon[:] = np.asanyarray(lons) # Add longitude values
time[:] = np.asarray(times) # Add time values

chla_netcdf[:,:,:] = chla_stack # Add chl-a data
mld_netcdf[:,:,:] = mld_stack # Add mld data
sss_netcdf[:,:,:] = sss_stack # Add sss data
sst_netcdf[:,:,:] = sst_stack # Add sst data
xco2_netcdf[:,:,:] = xco2_stack # Add xCO2 data 
pco2_netcdf[:,:,:] = pco2_stack # Add pCO2 data

# Print completion message and variable shapes as a check
print("-- Wrote data, chla_netcdf.shape is now ", chla_netcdf.shape)
print("-- Wrote data, mld_netcdf.shape is now ", mld_netcdf.shape)
print("-- Wrote data, sss_netcdf.shape is now ", sss_netcdf.shape)
print("-- Wrote data, sst_netcdf.shape is now ", sst_netcdf.shape)
print("-- Wrote data, xco2_netcdf.shape is now ", xco2_netcdf.shape)
print("-- Wrote data, pco2_netcdf.shape is now ", pco2_netcdf.shape)

('time', <class 'netCDF4._netCDF4.Dimension'>: name = 'time', size = 384)
('lat', <class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 180)
('lon', <class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 360)
-- Wrote data, chla_netcdf.shape is now  (180, 360, 384)
-- Wrote data, mld_netcdf.shape is now  (180, 360, 384)
-- Wrote data, sss_netcdf.shape is now  (180, 360, 384)
-- Wrote data, sst_netcdf.shape is now  (180, 360, 384)
-- Wrote data, xco2_netcdf.shape is now  (180, 360, 384)
-- Wrote data, pco2_netcdf.shape is now  (180, 360, 384)


In [20]:
# Print variables as a final check that all are available
print(variables_netcdf)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    dimensions(sizes): time(384), lat(180), lon(360)
    variables(dimensions): float32 lat(lat), float32 lon(lon), float64 time(time), float64 chl_a(lat, lon, time), float64 mld(lat, lon, time), float64 sss(lat, lon, time), float64 sst(lat, lon, time), float64 xco2(lat, lon, time), float64 pco2(lat, lon, time)
    groups: 


In [21]:
# Close the Dataset.
variables_netcdf.close() 
# Print closed statement
print('Dataset is closed!')

Dataset is closed!
