In [1]:
import os
from datetime import datetime
import numpy as np
from netCDF4 import Dataset
import netCDF4
import matplotlib.pyplot as plt
from wrf import to_np, getvar, CoordPair, vertcross, latlon_coords, interplevel, get_cartopy,  xy_to_ll, ll_to_xy, smooth2d
import wrf
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors
from matplotlib.cm import get_cmap
from matplotlib.colors import LinearSegmentedColormap
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.feature import NaturalEarthFeature
import glob
import xarray as xr
import pandas as pd
from scipy.ndimage import gaussian_filter
from matplotlib.cm import get_cmap
from statistics import multimode, mode
from scipy.ndimage import label, find_objects
import scipy.stats as stats
import pickle

In [5]:
# Initialize an empty list to store the filtered ivt arrays
pv_list = []

# Get a list of all .npy files in the directory
file_paths = glob.glob('/scratch/sawyer/wwrf/2017-01-09/ensemble_data/pv/pv_p3_2nd_*.npy')

# Loop through each file and load, filter, and append the data
for file in file_paths:
    pv_arrays = np.load(file, allow_pickle=True)
    pv_list.append(pv_arrays)
    

In [14]:
print(pv_list[0][0:20,:,:].max())#ensemble_ivt = np.concatenate(ivt_list)

70.0897


In [7]:
for array in pv_list:
    sigma = np.std(array)
    mu = np.mean(array)

    print('sigma: ', sigma)
    print('mu: ', mu)
    two_sigma = (sigma*2) + mu
    three_sigma = (sigma*3) + mu
    print('two_sigma: ', two_sigma)
    print('three_sigma: ', three_sigma)

sigma:  62.724888
mu:  27.11414
two_sigma:  152.56391525268555
three_sigma:  215.28880310058594
sigma:  67.28661
mu:  28.49776
two_sigma:  163.07098770141602
three_sigma:  230.35760116577148
sigma:  54.654087
mu:  24.378834
two_sigma:  133.68700790405273
three_sigma:  188.34109497070312
sigma:  67.04408
mu:  28.229536
two_sigma:  162.31770133972168
three_sigma:  229.36178398132324
sigma:  67.93078
mu:  27.073492
two_sigma:  162.93504905700684
three_sigma:  230.8658275604248
sigma:  48.78898
mu:  24.76281
two_sigma:  122.34076690673828
three_sigma:  171.12974548339844
sigma:  49.853542
mu:  24.244278
two_sigma:  123.95136260986328
three_sigma:  173.80490493774414
sigma:  59.286934
mu:  25.651485
two_sigma:  144.2253532409668
three_sigma:  203.51228713989258
sigma:  69.05544
mu:  28.505266
two_sigma:  166.61615180969238
three_sigma:  235.67159461975098
sigma:  58.128277
mu:  25.377426
two_sigma:  141.63397979736328
three_sigma:  199.76225662231445
sigma:  50.144363
mu:  26.21419
two_sigm

In [None]:
n_iterations = 5000  # Number of bootstrap samples to create
sample_size = len(ensemble_ivt)  # Size of a bootstrap sample

# Bootstrap procedure
bootstrap_means = np.empty(n_iterations)
for i in range(n_iterations):
    bootstrap_sample = np.random.choice(ensemble_ivt, size=sample_size, replace=True)
    bootstrap_means[i] = np.mean(bootstrap_sample)


In [None]:
mean_of_bootstrap_means = np.mean(bootstrap_means)
std_err_of_bootstrap_means = np.std(bootstrap_means)

# Calculate the 95% confidence interval
conf_interval = np.percentile(bootstrap_means, [2.5, 97.5])

print(f"Mean of bootstrap means: {mean_of_bootstrap_means:.2f}")
print(f"Standard error of bootstrap means: {std_err_of_bootstrap_means:.2f}")
print(f"95% confidence interval: ({conf_interval[0]:.2f}, {conf_interval[1]:.2f})")


In [None]:
ivt_99th_percentile = np.percentile(ensemble_ivt, 99)
print(ivt_99th_percentile)

In [None]:
# Histogram of your data
plt.hist(ensemble_ivt, bins=25, density=True, alpha=0.6, color='b')

# Plot the normal distribution
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mu, sigma)
plt.plot(x, p, 'k', linewidth=2)

# Fill the area under the curve for values greater than mu + 2*sigma
x_fill = np.linspace(mu + 2*sigma, xmax, 100)
p_fill = stats.norm.pdf(x_fill, mu, sigma)
plt.fill_between(x_fill, p_fill, color='yellow', alpha=0.5)  # choose your preferred color and alpha
# Add vertical lines for mu and mu ± sigma, mu ± 2*sigma, etc.
colors = ['y', 'r']  # you can choose your preferred colors
for i, color in enumerate(colors, 2):
    plt.axvline(mu + i*sigma, color=color, linestyle='dashed', linewidth=1)

# Annotate the value of mu + 2*sigma near the dashed line
annotation_position = (mu + 2*sigma, plt.ylim()[1]*0.5)  # adjust y-coordinate as needed
three_sig_annotation_position = (mu + 3*sigma, plt.ylim()[1]*.5) 
plt.annotate(rf"$2\sigma = {mu + 2*sigma:.0f}$", annotation_position, 
             color='black', horizontalalignment='right')

    
plt.annotate(rf"$3\sigma = {mu + 3*sigma:.0f}$", three_sig_annotation_position, 
             color='black', horizontalalignment='left')

# For the 2*sigma annotation
plt.annotate(
    rf"$\mu = {mu:.0f}$",
    xy=(1, 1), 
    xycoords='axes fraction',
    xytext=(-5, -5),  # Offset from the top right corner in points (right and up are negative values)
    textcoords='offset points',
    color='black',
    horizontalalignment='right', 
    verticalalignment='top'
)

# For the 3*sigma annotation (positioned slightly below the 2*sigma annotation)
plt.annotate(
    rf"$\sigma = {sigma:.0f}$",
    xy=(1, 1), 
    xycoords='axes fraction',
    xytext=(-5, -20),  # Adjust this offset as necessary to position it right below the previous annotation
    textcoords='offset points',
    color='black',
    horizontalalignment='right', 
    verticalalignment='top'
)

#plt.title(r"Ensemble IVT distribution: $\mu = %.0f , \sigma = %.0f$" % (mu, sigma))
# Add x and y-axis labels
plt.xlabel('IVT (kg m$^{-1}$ s$^{-1}$)')
plt.ylabel('Probability Density')
plt.show()

In [None]:
# Histogram of your data
plt.hist(ensemble_ivt, bins=25, density=True, alpha=0.6, color='b')


# Plot the normal distribution
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mu, sigma)
plt.plot(x, p, 'k', linewidth=2)

# Fill the area under the curve for values greater than mu + 2*sigma
x_fill = np.linspace(mu + 2*sigma, xmax, 100)
p_fill = stats.norm.pdf(x_fill, mu, sigma)
plt.fill_between(x_fill, p_fill, color='yellow', alpha=0.5)  # choose your preferred color and alpha

# Set x-axis and y-axis limits to zoom in
plt.xlim(mu, 1000)  # Adjusting x-axis starting limit to slightly before 2 sigma for clarity
plt.ylim(0, 0.00075)   # Adjusting y-axis limit to slightly above the peak of the fill area

# Add vertical lines for mu and mu ± sigma, mu ± 2*sigma, etc.
colors = ['y', 'r']  # you can choose your preferred colors
for i, color in enumerate(colors, 2):
    plt.axvline(mu + i*sigma, color=color, linestyle='dashed', linewidth=1)

# Annotate the value of mu + 2*sigma near the dashed line
annotation_position = (mu + 2*sigma, plt.ylim()[1]*0.5)  # adjust y-coordinate as needed
three_sig_annotation_position = (mu + 3*sigma, plt.ylim()[1]*.5) 
plt.annotate(rf"$2\sigma = {mu + 2*sigma:.0f}$", annotation_position, 
             color='black', horizontalalignment='right')

    
plt.annotate(rf"$3\sigma = {mu + 3*sigma:.0f}$", three_sig_annotation_position, 
             color='black', horizontalalignment='left')
# For the 2*sigma annotation
plt.annotate(
    rf"$\mu = {mu:.0f}$",
    xy=(1, 1), 
    xycoords='axes fraction',
    xytext=(-5, -5),  # Offset from the top right corner in points (right and up are negative values)
    textcoords='offset points',
    color='black',
    horizontalalignment='right', 
    verticalalignment='top'
)

# For the 3*sigma annotation (positioned slightly below the 2*sigma annotation)
plt.annotate(
    rf"$\sigma = {sigma:.0f}$",
    xy=(1, 1), 
    xycoords='axes fraction',
    xytext=(-5, -20),  # Adjust this offset as necessary to position it right below the previous annotation
    textcoords='offset points',
    color='black',
    horizontalalignment='right', 
    verticalalignment='top'
)


#plt.title(r"Ensemble IVT distribution: $\mu = %.0f , \sigma = %.0f$" % (mu, sigma))
# Add x and y-axis labels
plt.xlabel('IVT (kg m$^{-1}$ s$^{-1}$)')
plt.ylabel('Probability Density')
plt.show()