In [1]:
# Import relevant packages
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import seaborn as sns
from netCDF4 import Dataset
from datetime import date, timedelta
import time

In [26]:
### Due to file size and number of files (200GB total)
### these data sets were stored on a flash drive for ease of accessibility

# Access data on flash drive
os.chdir('### Your Directory to daily SST data up to years ### ')
# Get only folders that store data
folders = [f for f in os.listdir() if not f.startswith('.')]
# Print folder names as check
print(folders)

In [None]:
# Path to single data files (01/01/1990)
file = '### Your Directory to a single daily SST data (used was 1990-01-01)###'
# Read data
data = Dataset(file)
# Print data set variable keys
print(data.variables.keys())

In [6]:
# Print time variable metadata
print(data.variables['time'])

<class 'netCDF4._netCDF4.Variable'>
int32 time(time)
    long_name: reference time of sst field
    standard_name: time
    axis: T
    units: seconds since 1981-01-01 00:00:00
    calendar: gregorian
    bounds: time_bnds
    comment: 
unlimited dimensions: time
current shape = (1,)
filling on, default _FillValue of -2147483647 used

In [7]:
# Get data latiudes (in reverse order)
lats = np.ma.getdata(data.variables['lat'][:])[::-1]
# Get data longitudes
lons = np.ma.getdata(data.variables['lon'][:])

In [8]:
# Print latitudes
print(lats)
# Print longtudes
print(lons)

array([-179.975, -179.925, -179.875, ...,  179.875,  179.925,  179.975],
      dtype=float32)

In [None]:
### REDUCING RESOLUTION OF DATA OVER TIME AND SPACE ###

# Loop over folders (years and months)
for i,folder in enumerate(folders):
    
    # Print loop index and folder name
    print(f'{i}:{folder} -', end=' ' )
    
    # Access flash drive folder for desired year
    os.chdir(f'### Your Directory to daily SST data ###/{folder}')
    # Make list of file names that contain the data
    files = [F for F in os.listdir() if not F.startswith('.')]
    # Print length of file list - should correspond to number of days in the month
    print(f'{len(files)}', end=' ')
    
    # Loop over file names in list
    for j,file in enumerate(files):
        
        # Print index of loop to display each day of data being accessed
        print(f'{j}', end=',')
        # Read data
        data = Dataset(file)
        # Get sea surface temperature values
        sst_vals = np.ma.getdata(data.variables['analysed_sst'][:])
        # Make mask values equal to NaN
        sst_vals[sst_vals < -32700] = np.nan
        # Flip data to correct orientaion and reduce to 2-dimensional array
        sst_vals = np.flip(np.squeeze(sst_vals),axis=0)

        # If on first iteration of loop (first file) then initialise 3D array for storing data
        if j == 0:
            sst_single_month_data = sst_vals
        # Else append data to 3D array
        else:
            sst_single_month_data = np.dstack((sst_single_month_data, sst_vals))
            
    # Get montly sea surface temperature average over month
    month_sst_avg = np.nanmean(sst_single_month_data, axis=2)
    
    # Put month data into Dataframe indexed by coordinate position
    one_month_data = pd.DataFrame(data=month_sst_avg, index=lats, columns=lons)

    # Initialise a 1x1 degree grid of zeros
    one_deg_grid = np.zeros((len(range(90,-90,-1)),len(range(-180,180,1))))

    # Loop over latitudes - increments of 1-degree
    for lat_ind,latitude in enumerate(range(90,-90,-1)):
        # Loop over longitudes - increments of 1-degree
        for lon_ind,longitude in enumerate(range(-180,180,1)):

            # Get 1x1 degree square of values
            # If/elif statements catch edge values to ensure all data is captured
            if (longitude == 179) and (latitude == -89):
                one_deg_square = np.asarray(one_month_data.loc[(one_month_data.index <= latitude) & (one_month_data.index >= latitude-1),(one_month_data.columns >= longitude) & (one_month_data.columns <= longitude+1)])

            elif (longitude == 179):
                one_deg_square = np.asarray(one_month_data.loc[(one_month_data.index <= latitude) & (one_month_data.index > latitude-1),(one_month_data.columns >= longitude) & (one_month_data.columns <= longitude+1)])

            elif (latitude == -89):
                one_deg_square = np.asarray(one_month_data.loc[(one_month_data.index <= latitude) & (one_month_data.index >= latitude-1),(one_month_data.columns >= longitude) & (one_month_data.columns < longitude+1)])

            else:
                one_deg_square = np.asarray(one_month_data.loc[(one_month_data.index <= latitude) & (one_month_data.index > latitude-1),(one_month_data.columns >= longitude) & (one_month_data.columns < longitude+1)])

            # If all values in square are NaN, set the square mean and NaN
            if np.isnan(one_deg_square).all() == True:
                one_deg_mean = np.nan
            # Else set to the mean of the values in the square (excluding NaNs)
            else:
                one_deg_mean = np.nanmean(one_deg_square)

            # Over write zeros grid with SST data at current index
            one_deg_grid[lat_ind, lon_ind] = one_deg_mean
    
    # Change directory to folder for storing monthy averages
    os.chdir('### Your Directory to store monthly means ###')
    # Save monthly averages to this folder
    np.savetxt(f'{folder}_LR.csv',one_deg_grid, delimiter=',')
    
    # If on first iteration of outer loop then initalise 3D array to store averaged SST data
    if i == 0:
        month_avg_stack = one_deg_grid
    # Else append SST average to 3D array
    else:
        month_avg_stack = np.dstack((month_avg_stack,one_deg_grid))
    
    # Print current shape of 3D array as a check
    print(f'{month_avg_stack.shape}')

    
# Print final complete message with 3D array shape as a check
print(f'Complete: {month_avg_stack.shape}')

In [None]:
# Loop to flatten array
for d in range(0, month_avg_stack.shape[2]):
    if d == 0:
        sst_LR_flat = month_avg_stack[:,:,d]
    else:
        sst_LR_flat = np.concatenate((sst_LR_flat, month_avg_stack[:,:,d]), axis=0)
        
# Print shape of flat data as a check
print(sst_LR_flat.shape)        
# Save flat data
np.savetxt('sst_LR_flat_NEW.csv',sst_LR_flat, delimiter=',')