In [3]:
%load_ext autoreload
%autoreload 2
import numpy as np
import re
import time
%matplotlib
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import textwrap
import scipy.stats as stats

from scipy import stats
import scipy
from functools import partial

import matplotlib.pylab as pylab
import seaborn as sns
sns.set_style('whitegrid', {'legend.frameon':True})
sns.set_palette(sns.color_palette("Set1", 12))
#sns.set_context("paper")
fontsize = 12
params = {'legend.fontsize': fontsize,
  'figure.figsize': (18, 15),
  'axes.labelsize': fontsize,
  'axes.titlesize':fontsize,
  'axes.edgecolor':"0.3",
  'xtick.labelsize':fontsize,
  'ytick.labelsize':fontsize,
  'legend.fontsize':10,
  'font.size':fontsize,
  'font.family':'serif'}
pylab.rcParams.update(params)
plt.rc('axes', labelsize=fontsize) 

#plt.style.use('ggplot')
%matplotlib inline

from tqdm.notebook import tqdm
import pandas as pd
from pathlib2 import Path
import os

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Using matplotlib backend: QtAgg


In [4]:
import sys
sys.path.append('../')

from metric_store import save_metrics, save_metric, load_metrics, get_metric_names, load_metric
from network_metrics import prepare_metric
from pipeline.pipeline import load_config

In [5]:
config_name = "pipeline.config"
config = load_config(config_name)

In [None]:
result_dir = config.metrics_plot_options['work_dir'] / 'results_histogram' / 'metric_per_month'
result_dir.mkdir(parents=True, exist_ok=True)

In [6]:
from corr_network import load_data, get_available_mask
data = load_data(config)
available_mask = get_available_mask(data)

In [7]:
prefix = 'probability_for_metrics'
metric_names = list(get_metric_names(config, prefix = prefix).keys())
metric_names

['probability_for_metrics/network_metrics/LCC_w',
 'probability_for_metrics/network_metrics/degree_w',
 'probability_for_metrics/network_metrics/EVC_w',
 'probability_for_metrics/network_metrics/closeness_w',
 'probability_for_metrics/network_metrics/LCC_0.9',
 'probability_for_metrics/network_metrics/degree_0.9',
 'probability_for_metrics/network_metrics/EVC_0.9',
 'probability_for_metrics/network_metrics/closeness_0.9',
 'probability_for_metrics/network_metrics/LCC_0.95',
 'probability_for_metrics/network_metrics/degree_0.95',
 'probability_for_metrics/network_metrics/EVC_0.95',
 'probability_for_metrics/network_metrics/closeness_0.95',
 'probability_for_metrics/diff_metrics/network_metrics/LCC_w',
 'probability_for_metrics/diff_metrics/network_metrics/degree_w',
 'probability_for_metrics/diff_metrics/network_metrics/EVC_w',
 'probability_for_metrics/diff_metrics/network_metrics/closeness_w',
 'probability_for_metrics/diff_metrics/network_metrics/LCC_0.9',
 'probability_for_metrics/d

In [17]:
metric_names = [
    'network_metrics/LCC_w',
    'network_metrics/LCC_0.9',
    'network_metrics/LCC_0.95',
    #'probability_for_metrics/network_metrics/LCC_w', 
    #'probability_for_metrics/network_metrics/LCC_0.9',
    #'probability_for_metrics/network_metrics/LCC_0.95',
]
metrics = []
for metric_name in metric_names:
    config.metrics_plot_options['metric_name'] = metric_name
    metric = load_metric(config, metric_name)
    metric = prepare_metric(metric_name, metric, available_mask)
    print(metric_name, metric.shape)
    metrics += [metric]

network_metrics/LCC_w (36, 69, 113960)
network_metrics/LCC_0.9 (36, 69, 113960)
network_metrics/LCC_0.95 (36, 69, 113960)


In [18]:
def compute_greater(metric_array):
    inds = np.argsort(-metric_array)
    n_greater = np.zeros(len(metric_array), dtype='int32')
    n_greater[inds] = np.arange(len(metric_array))
    return n_greater

def compute_probability_for_metrics(metric, dtype='float16'):
    # metric - 3D np.ndarray (lat, lon, time)

    n, m, k = metric.shape
    prob = np.zeros((n, m, k), dtype=dtype)
    nan_mask = np.isnan(metric)

    for lat in range(n):
        for lon in range(m):
            metric_in_node = metric[lat, lon, :]
            n_all = len(metric_in_node)
            n_greater = compute_greater(metric_in_node)
            prob[lat, lon, :] = n_greater / n_all
    prob[nan_mask] = np.nan

    return prob


In [19]:
for i in range(3):
    metrics += [compute_probability_for_metrics(metrics[i], dtype=np.float32)]
metric_names += [
    'probability_for_metrics/network_metrics/LCC_w', 
    'probability_for_metrics/network_metrics/LCC_0.9',
    'probability_for_metrics/network_metrics/LCC_0.95'
]

In [20]:
import os
os.chdir(r"D:\PC\UNN\Science\Climate\programs\climate-networks\src")
from plot_network_metrics.utils import get_times_lats_lots
from cyclones_info.cyclones_info import get_cyclones_info, get_cyclones
all_times, all_lats, all_lons = get_times_lats_lots(config)

In [21]:
from collections import defaultdict
month_ids = defaultdict(list)
for i, cur_time in enumerate(all_times):
    month = cur_time[5:7]
    month_ids[month] += [i]

In [38]:
len(all_times), metric.shape

(113960, (36, 69, 113960))

In [80]:
y = metrics[3][:, :, month_ids["07"]]

In [82]:
z = (y < 0.5).flatten()
z.mean()

0.2109498307780215

In [71]:
np.isnan(y).mean()

0.4690016103059581

In [64]:
z.sum()

7689246

In [65]:
df["07"][50:].sum()

7689246

In [83]:
bins = np.linspace(0, 1, 101)
df_metrics = []
for i, metric in tqdm(enumerate(metrics[3:])):
    df = pd.DataFrame(columns=['bin_center'] + list(month_ids.keys()))
    for month, ids in tqdm(month_ids.items()):
        month_metric = metric[:, :, ids]
        month_metric = month_metric[~np.isnan(month_metric)]
        counts, edges = np.histogram(month_metric, bins)
        bin_centers = (edges[1:] + edges[:-1]) * .5
        df['bin_center'] = bin_centers
        df[month] = counts
        
    df_metrics += [df]

0it [00:00, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

In [153]:
import plotly.express as px
import plotly.graph_objects as go

months = ["Unknown",
          "January",
          "February",
          "March",
          "April",
          "May",
          "June",
          "July",
          "August",
          "September",
          "October",
          "November",
          "December"]

palette = sns.color_palette("husl", 12)
palette_str = [f'rgb{color}' for color in palette]
for i, metric in enumerate(metrics[3:]):
    df = df_metrics[i]
    #plt.figure(figsize=(9, 5))
    fig = go.Figure()
    for j, (month, ids) in enumerate(month_ids.items()):
        bin_centers = df['bin_center']
        counts = df[month]
        fig.add_trace(
            go.Scatter(x=bin_centers, y=counts, name=f'Month: {month} = {months[int(month)]}', mode='lines', line={'color': palette_str[j]})
        )
        fig.update_layout(
            title=f"{metric_names[3 + i]}", 
            autosize=True,
            #width=1000,
            #height=600
            xaxis_range=[0,1],
            font=dict(
                family="Computer Modern",
                size=18,
                color="RebeccaPurple"
            )
        )
        
        #plt.plot(bin_centers, counts, label=f"Month: {month}")
        #plt.legend(ncol=4, loc='upper center')
        #plt.title(f"{metric_names[3 + i]}\nMonth: {month}")
        #plt.title(f"{metric_names[3 + i]}")
    #fig.show()
    file_name = result_dir / ("metric_per_month_" + metric_names[3 + i].replace('/', '$') + ".html")
    fig.write_html(str(file_name))

In [154]:
for i, metric in enumerate(metrics[3:]):
    file_name = result_dir / ("metric_per_month_" + metric_names[3 + i].replace('/', '$') + ".csv")
    df = df_metrics[i]
    df.to_csv(file_name)

In [140]:
file_name

WindowsPath('Z:/Research/Climate/data/ERA5/ERA5_MSL_1982_2020_3h_0.75/results_histogram/metric_per_month/probability_for_metrics$network_metrics$LCC_0.95.html')