## Importing and setting up paths

In [1]:
import sys,os,importlib,gc
import xarray as xr
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

os.chdir('/Users/peterpfleiderer/Projects/tropical_cyclones/tc_emulator/results')

sys.path.append('../scripts')
import _weather_pattern_class; importlib.reload(_weather_pattern_class)

atl = _weather_pattern_class.weather_patterns(source='ERA5', working_directory='./')

## Prepare weather patterns

### prepare input

here the raw data is loaded and some preprocessing is applied:

* select ASO
* compute standardized anomalies
* regrid to 2.5x2.5 over 90W-10W and 10N-30N

In [133]:
# load MSLP and VWS
# these fields were preprocessed using the scripts in input_data_preparation
tmp = xr.load_dataset('/Users/peterpfleiderer/Projects/data/ERA5/ERA5_vws_1950-2019_daily_noTC3.nc')['vws'].loc['1950':'2020']
atl.add_data(tmp, 'VWS', 'PRGn_r')
tmp = xr.load_dataset('/Users/peterpfleiderer/Projects/data/ERA5/ERA5_msl_1950-2019_daily_noTC3.nc')['msl'].loc['1950':'2020']
atl.add_data(tmp, 'MSLP', 'RdBu_r')

In [134]:
# the input is transformed to standardized anomalies and only the ASO season is considered
atl.preprocess_select_months(months=[8,9,10], month_colors=['#bfef45','#ffe119','#f58231'])
atl.preprocess_standardized_anomalies(['1981','2010'])
atl.preprocess_regrid(reference_lats=np.arange(10, 30, 2.5),reference_lons=np.arange(-90, -10, 2.5))
atl.preprocess_create_vector(variables = ['VWS','MSLP'])
atl.store_input('ERA5_VWS-MSLP_noTC3')

Reuse existing file: bilinear_40x120_8x32.nc
Reuse existing file: bilinear_40x120_8x32.nc


### load input

In [2]:
atl.load_input('ERA5_VWS-MSLP_noTC3')
years = np.array(range(1979,2019))
atl.set_split(years=years)

### Sammon mapping and kmeans clsutering

The sammon mapping can be useful for vizualisation purposes.\
Additionally, it is used here as input for a kmeans clustering.\
This kmeans clustering is later used as initialization for the SOM clustering.


In [147]:
import sys
from sklearn import manifold
sys.path.append('/Users/peterpfleiderer/Projects/git-packages/sammon')
from sammon import sammon

def mapping_sammon(X):
    [sammon_proj,E] = sammon(X, 2, display=0, init='pca')
    return sammon_proj,{'error':E}

atl.mapping_wrapper(mapping_sammon, tag='mapping_sammon')
atl.load_mapping('mapping_sammon')
atl.plot_mapping()

In [148]:
from sklearn import cluster
def clustering_kmeans(X):
    clustering_ = cluster.KMeans(init='k-means++', n_clusters=20, n_init=100)
    clustering_.fit(X)
    return {'centers_map':clustering_.cluster_centers_}

nrows,ncols = 5,4
atl.load_mapping('mapping_sammon')
atl.clustering_wrapper(clustering_kmeans, tag='clustering_kmeans20', overwrite=True)
atl.define_plot_environment(pre_mapping='mapping_sammon', clustering='clustering_kmeans20', post_mapping='mapping_sammon_1979-2018', nrows=nrows, ncols=ncols)

{}
.//ERA5_VWS-MSLP_noTC3/mapping_sammon_1979-2018/clustering_kmeans20/mapping_sammon_1979-2018/grid_5x4


### SOM weather patterns

Weather patterns are obtained from **S**elf **O**rganizing **M**aps \
As mentioned above the knmeans clsuters are used as initialization. \
This is a hack that avoids having to choose weather patterns from different initializations. \
It therefore seemed more robust to me but there would be numerous evenly reasonable alternatives.

In [159]:
from minisom import MiniSom
from sklearn.metrics import pairwise_distances

def SOM_(X):
    # this is a bit of a hack:
    # I get the the centers from my kmeans clusters
    # than I initialize a SOM object from MiniSom
    # I create some pca_weights that I directly replace by my cluster centers from the kmeans clustering
	atl.define_plot_environment(pre_mapping='mapping_sammon', clustering='clustering_kmeans20', post_mapping='mapping_sammon_1979-2018', nrows=nrows, ncols=ncols)
	init_centers = atl._clust_centers.copy()
	init_grid = atl._axes_grid
	atl.load_mapping('mapping_raw')
	atl._dir_lvl2 = atl._dir_lvl0+'/'+ 'mapping_raw'+atl._split_tag + '/' + tag
	atl.load_mapping_for_plotting('mapping_sammon_1979-2018')

	som = MiniSom(nrows,ncols,X.shape[1], random_seed=1, sigma=1,learning_rate=0.01)
	som.pca_weights_init(atl._vector.values)
	for i,lab in enumerate(np.unique(atl._clust_labels)):
		som._weights[atl._axes_grid[atl._grid_labels==lab,0][0],atl._axes_grid[atl._grid_labels==lab,1][0]] = init_centers.values[i,:]

	with PdfPages(atl._dir_lvl2+'/training_network.pdf') as pdf:
		for i in range(100):
			atl._clust_labels = pairwise_distances(som.get_weights().reshape((nrows*ncols,X.shape[1])),atl._pre_mapping[:,:]).argmin(axis=0)
			atl.map_cluster_centers(overwrite=True)
			fig,ax = plt.subplots(nrows=1, ncols=1, figsize=(4,3)); ax.axis('off')
			atl.plot_network_ax(ax)
			pdf.savefig(bbox_inches='tight'); plt.close()
			som.train_random(X, 10000, verbose=False)
			som._weights = som.get_weights()
			gc.collect()

	out = {'centers_map':som.get_weights().reshape((nrows*ncols,X.shape[1])), 'other_results':{'SOM':som}}
	return out

nrows,ncols = 5,4
tag = 'SOM_kmeansInit%sx%s_v1' % (nrows,ncols)
atl.mapping_raw()
atl.clustering_wrapper(SOM_, tag=tag, overwrite=True)

{}
.//ERA5_VWS-MSLP_noTC3/mapping_sammon_1979-2018/clustering_kmeans20/mapping_sammon_1979-2018/grid_5x4


#### Load weather patterns

In [3]:
nrows,ncols = 5,4
tag = 'SOM_kmeansInit%sx%s_v1' % (nrows,ncols)
atl.define_plot_environment(pre_mapping='mapping_raw', clustering=tag, post_mapping='mapping_sammon_1979-2018', nrows=nrows, ncols=ncols)
atl.stats_TC(file='tracks/tracks_ibtracks.csv', overwrite=False)

{'SOM': <minisom.MiniSom object at 0x7ff627ab2090>}
.//ERA5_VWS-MSLP_noTC3/mapping_raw_1979-2018/SOM_kmeansInit5x4_v1/mapping_sammon_1979-2018/grid_5x4


In [171]:
atl.stats_frequency()
atl.plot_freq()
atl.plot_fields()
atl.plot_label_trend()

# scatter plot
atl.plot_events(indicator='wind', legend_values=[64,96,137], legend_name='wind speed [kn]')
atl.plot_events(indicator='ACE')

# lagged stats
atl.plot_stats(indicator='ACE')
atl.plot_stats(indicator='genesis')