In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import os

In [None]:
np.random.seed(5)

In [None]:
n_playa = 71848 # 71848 is all of them
n_iter = 1000

In [None]:
reload_run = True

# Load data

In [None]:
pred_df = pd.read_csv('../data/all_preds.csv')
# Add months and years back in
pred_df['date'] = np.tile(pd.date_range('1984-03', periods=418, freq='M'), int(pred_df.shape[0]/418))
pred_df = pred_df.set_index(['id','date'])

# Make plots

In [None]:
def simulate_inundation(pred_df, n_playa, n_iter, n_time=418):
    frac_inundated = np.zeros(shape = (n_time, n_iter), dtype=np.float) - 1
    for i in range(n_iter):
        binary_run = np.random.binomial(
            n=1,
            p=pred_df['pred'].values.reshape([n_playa, n_time])
        )
        assert binary_run.min() == 0.
        assert binary_run.max() == 1.
        assert binary_run.shape == (n_playa, n_time)
        frac_inundated[:,i] = np.mean(binary_run, axis=0)
        if i % 100 == 0:
            print(i, 'done')
            
        
    # Checks, commenting out because too much memory usage
#     assert frac_inundated.shape == (n_time, n_iter)
#     assert frac_inundated.min() > 0.
#     assert frac_inundated.max() < 1.

    
    return frac_inundated


In [None]:
if reload_run and os.path.exists('./frac_inundated.npy'):
    frac_inundated = np.load('./frac_inundated.npy')
else:
    if n_playa < 71848:
        frac_inundated = simulate_inundation(pred_df.iloc[0:(418*n_playa)], n_playa, n_iter)
    else: 
        frac_inundated = simulate_inundation(pred_df, n_playa, n_iter)
        

In [None]:
inundation_sd = np.std(frac_inundated, axis=1)
inundation_mean_of_means = np.mean(frac_inundated, axis=1)
inundation_975 = np.percentile(frac_inundated, 0.975, axis=1)
inundation_025 = np.percentile(frac_inundated, 0.975, axis=1)
inundation_min = np.min(frac_inundated, axis=1)
inundation_max = np.max(frac_inundated, axis=1)

In [None]:
np.mean(frac_inundated[5])

In [None]:
np.max(frac_inundated[5])

In [None]:
plt.hist(frac_inundated[5])

In [None]:
# plot all of the draws for the fraction inundated
dates = pred_df.index.get_level_values(1)[:418]
fig, ax = plt.subplots(figsize=[15,3.5])
plt.plot(dates, pred_df['true'].groupby('date').mean().values, label='True',linewidth=1.25)

# plt.plot(dates, inundation_min, label='Predicted Range', 
#          linestyle='--', color='darkorange', linewidth=1)
# plt.plot(dates, inundation_max, 
#          linestyle='--', color='darkorange', linewidth=1)
plt.plot(dates, inundation_mean_of_means, label='Predicted', color='red', linewidth=1.25)


# Commenting out spreads, not very interesting
# plt.plot(dates, inundation_975, label='97.5%')
# plt.plot(dates, inundation_025, label='2.5%')
plt.legend(loc=(0.47, 0.74), prop={'size':14})
plt.xlabel("Date", size=15)
plt.ylabel("Fraction inundated",size=15)
plt.xlim(dt.datetime(1984,1,1), dt.datetime(2019,1,1))
plt.axvline(dt.datetime(2015,1,1), color='black')
plt.axvline(dt.datetime(2011,1,1), color='black')
plt.text(dt.datetime(2011,3,1), 0.135, 'Validation', size=15)
plt.text(dt.datetime(2015,3,1), 0.135, 'Test', size=15)
plt.text(dt.datetime(1984,3,1), 0.135, 'Train', size=15)
plt.show()


In [None]:
# Write frac_inundated to file to save time
np.save('./frac_inundated.npy', frac_inundated)