In [None]:
#Date 9-22-2022
#Author: Talia Kurtz

#Code Purpose:
# The purpose of this code is to get the ERA data into the format needed to input it into a SOM. The goal here is to first resample the ERA data from one hour to 6 hours. Once that step is done, the data needs
# to be resampled to winter months only. For this code, the winter months are defined as Jan., Feb., March, April, Oct., Nov., and Dec. The resample used from going from 1 hour to 6 hour is .nearest() because
# we do not want the mean of the hour we just want the measurements at 00z, 06z, 12z, and 18z for each day. The data once sampled will need to be normalized. This is done by caluclating the minimum and maximum
# for each hour subtracting minimum from the maximum and then dividing 100 by that value to get a MSLP factor that is then multiplied by the MSLP data. The input data will be of the shape (hours, lat*lon)

#What is saved from this code:
# The data_train which will be used as input into the SOM when you generate the SOM.
# The raw MSLP data from the ERA data which will be used when you graph the raw data for the SOMs.

##################################################################################################################################################################################################################
#Imports for the Code:
#Imports
import xarray as xr
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

##################################################################################################################################################################################################################
#Opening up the raw data that is not regrouped every 6 hours and not filtered to just be winter months. This is the raw ERA5 Regridded data.
PATH ='/Users/research/thesis_code/'
dy = xr.open_dataset(PATH+'e5.MSL_ERA_SOM__new1.nc')
##################################################################################################################################################################################################################
dx = dy.resample(time = '6H').nearest()
##################################################################################################################################################################################################################
#Grabbing only the Winter month data in this line.
ds = dx.isel(time=dx.time.dt.month.isin([1,2,3,4,10,11,12]))
##################################################################################################################################################################################################################
#Creating Variables from the subset winter ERA5 Data.
time_values = ds['time'].values
mslp_values = ((ds['MSL'])/100).values
mslpraw = (ds['MSL'])/100  #This is the data the NON-anomaly data.
lon = ds['lon'].values
lat = ds['lat'].values

#generate the empty array that will house the 6-hour interval data.
nhour =int((ds['time'].size))
nlat = int((ds['lat'].size))
nlon = int((ds['lon'].size))
mslparr = np.empty((nhour, nlat*nlon))  #This is the new array that we will place the data into.
##################################################################################################################################################################################################################
#We are now going to place the raw MSLP data into the array (mslparr)
for i in range(nhour):
    mslparr[i,:]= mslpraw[i,:,:].stack(point=["lat", "lon"])
##################################################################################################################################################################################################################
#We are now calculating the hourly anomaly data. The hourly mean will be removed from the data. 
for i in range(nhour):
    mslparr[i,:] =mslparr[i,:]-np.mean(mslparr[i,:])
##################################################################################################################################################################################################################
#We are now going to be normalizing the data by finding the minimum and maximum and generating a factor to multiple to the data. The factor is based off the hourly
#max and min in the MSLP data
maxmslp=-9999999
minmslp=999999   #we are setting the minmslp and maxmslp variables to a value so that there is no junk in the variable and each will easily overcome the set value.

for i in range(nhour):
    minmslp=min(minmslp,np.min(mslparr[i,:]))
    maxmslp=max(maxmslp,np.max(mslparr[i,:]))
print(maxmslp, minmslp)

#We are generating the MSLP factor to be multipled to the data to normalize it
mslp_factor=100./(maxmslp-minmslp)
print(mslp_factor)

#The data is now being normalized.
data_train = mslparr*mslp_factor
###################################################################################################################################################################################################################
#Saving the variables that we need for the SOM making and plotting 
#mslpraw.to_netcdf('/Users/research/thesis_code/SOM_MSLPraw_era_data.nc')

In [None]:
data_train

In [None]:
#np.save('som_data_train.npy', data_train)
np.save('som_time_data.npy', time_values)