### Import Libraries

In [35]:
from wrapper_functions import *

In [2]:
from typing import Any, Callable, Dict, List, Optional, Union, Tuple

import os
import gc
import time
import pickle
import functools

import multiprocessing as mp

# from google.colab import files
# from google.colab import 

import numpy as np
import tensorflow as tf

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import edward2 as ed
import tensorflow_probability as tfp

tfd = tfp.distributions
tfb = tfp.bijectors

dtype = tf.float32
import gpflow as gpf
import logging

logging.getLogger('tensorflow').setLevel(logging.ERROR)  # suppress pfor warnings
# Verify versions.
print(f'TensorFlow version: {tf.__version__}. Expected: 2.7.0')
print(f'TensorFlow Probability version: {tfp.__version__}. Expected: 0.15.0')
tf.test.gpu_device_name()

os.getcwd()

TensorFlow version: 2.10.0. Expected: 2.7.0
TensorFlow Probability version: 0.18.0. Expected: 0.15.0


'/Users/liyanran/Desktop/Research/Rachel/pop_ensemble/code'

In [4]:
def posterior_heatmap_2d(plot_data, X,
                         cmap='inferno_r',
                         #norm=None, norm_method="percentile",
                         save_addr=''):
    """Plots colored 2d heatmap using scatterplot.

    Args:
        plot_data: (np.ndarray) plot data whose color to visualize over
            2D surface, shape (N, ).
        X: (np.ndarray) locations of the plot data, shape (N, 2).
        X_monitor: (np.ndarray or None) Locations to plot data points to.
        cmap: (str) Name of color map.
        norm: (BoundaryNorm or None) Norm values to adjust color map.
            If None then a new norm will be created according to norm_method.
        norm_method: (str) The name of method to compute norm values.
            See util.visual.make_color_norm for detail.
        save_addr: (str) Address to save image to.

    Returns:
        (matplotlib.colors.BoundaryNorm) A color norm object for color map
            to be passed to a matplotlib.pyplot function.
    """
#     if save_addr:
#         pathlib.Path(save_addr).parent.mkdir(parents=True, exist_ok=True)
#         plt.ioff()

#     if not norm:
#         norm = make_color_norm(plot_data, method=norm_method)

    # 2d color plot using scatter
    plt.figure(figsize=(12, 8))
    plt.scatter(x=X[:, 0], y=X[:, 1],
                s=3,
                c=plot_data, cmap=cmap)#, norm=norm)
    cbar = plt.colorbar()

    #     plot monitors "lon", "lat"
#     if isinstance(X_monitor, np.ndarray):
#         plt.scatter(x=X_monitor[:, 0], y=X_monitor[:, 1],
#                     s=10, c='black')

    # adjust plot window
#     plt.xlim((np.nanmin(X[:, 0]), np.nanmax(X[:, 0])))
#     plt.ylim((np.nanmin(X[:, 1]), np.nanmax(X[:, 1])))

    if save_addr:
        plt.savefig(save_addr, bbox_inches='tight')
        plt.close()
        plt.ion()
    else:
        plt.show()

    #return norm

### Read in Data

In [5]:
training2010 = pd.read_csv('../data/merged_fb_census_data_280922.csv')
coordinate = np.asarray(training2010[["lon", "lat"]].values.tolist()).astype(np.float32)
training2010=training2010.fillna(0)

In [6]:
models = ['acs', 'pep', 'worldpop','fb']
# base_preds_train = tf.stack([training2010[m] for m in models], axis=-1).astype(np.float32)
# base_preds_test = tf.stack([training2010[m] for m in models], axis=-1).astype(np.float32)


# standardize
X_train1 = np.asarray(training2010[["lon", "lat"]].values.tolist()).astype(np.float32)
X_test1 = np.asarray(training2010[["lon", "lat"]].values.tolist()).astype(np.float32)
X_valid = np.concatenate((X_train1, X_test1), axis=0)
X_centr = np.nanmean(X_valid, axis=0)
X_scale = np.nanmax(X_valid, axis=0) - np.nanmin(X_valid, axis=0)

X_train1 = (X_train1 - X_centr) / X_scale
X_test1 = (X_test1 - X_centr) / X_scale

Y_train = np.expand_dims(training2010["census"], 1).astype(np.float32)

Y_test = np.expand_dims(training2010["census"], 1).astype(np.float32)

base_preds_train = tf.stack([training2010[m].astype(np.float32) for m in models], axis=-1)
base_preds_test = tf.stack([training2010[m].astype(np.float32) for m in models], axis=-1)
display(base_preds_train.shape, base_preds_test.shape)


print("2010 center and scale: ", X_centr, X_scale)

TensorShape([3108, 4])

TensorShape([3108, 4])

2010 center and scale:  [-90.83139   37.882298] [124.135445  48.820534]


In [7]:
X_train1

array([[ 0.03371918, -0.10942537],
       [ 0.02501395, -0.14633153],
       [ 0.04377531, -0.12306847],
       ...,
       [-0.15887196,  0.06978671],
       [-0.13572185,  0.12344731],
       [-0.11056888,  0.12210351]], dtype=float32)

In [8]:
from sklearn.model_selection import train_test_split
X, y = np.asarray(training2010[["lon", "lat"]].values.tolist()).astype(np.float32),np.asarray(training2010["census"]).astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

# Default Configs

In [9]:
# GP configs.
y_noise_std = 0.1  # @param
hidden_units = 128  # @param
lengthscale=1.  # @param
l2_regularizer=0.1  # @param

DEFAULT_GP_CONFIG = dict(lengthscale=lengthscale,
                         l2_regularizer=l2_regularizer, 
                         hidden_units=hidden_units, 
                         y_noise_std=y_noise_std)

In [10]:
# BNE model configs.
estimate_mean = "True" # @param ["True", "False"]
estimate_variance = "False" # @param ["True", "False"]
estimate_skewness = "False" # @param ["True", "False"]
variance_prior_mean=0. # @param
skewness_prior_mean=0. # @param

estimate_mean = eval(estimate_mean)
estimate_variance = eval(estimate_variance)
estimate_skewness = eval(estimate_skewness)

DEFAULT_BNE_CONFIG = dict(estimate_mean=estimate_mean,
                          estimate_variance=estimate_variance,
                          estimate_skewness=estimate_skewness,
                          variance_prior_mean=variance_prior_mean,
                          skewness_prior_mean=skewness_prior_mean)

# MAP configs.
map_step_size=0.1 # @param
map_num_steps=10_000 # @param

DEFAULT_MAP_CONFIG = dict(learning_rate=map_step_size,
                          num_steps=map_num_steps)

# MCMC configs.
mcmc_step_size=0.1 # @param
mcmc_sample_size=500 # @param
mcmc_num_steps=10_000 # @param
mcmc_burnin=2_500 # @param
mcmc_nchain=10 # @param
mcmc_seed=0 # @param

DEFAULT_MCMC_CONFIG = dict(step_size=mcmc_step_size, 
                           num_steps=mcmc_sample_size, 
                           burnin=mcmc_burnin, 
                           nchain=mcmc_nchain, 
                           seed=mcmc_seed)

In [11]:
# Model configs.
y_noise_std = 0.1  # @param
lengthscale=1.  # @param
l2_regularizer=0.1  # @param

# MCMC configs.
map_step_size=0.1 # @param
map_num_steps=10_000 # @param

mcmc_step_size=0.1 # @param
mcmc_num_steps=10_000 # @param

# Posterior configs.
bma_n_samples_train = 100 # @param
bma_n_samples_test = 200 # @param
bma_n_samples_eval = 1000  # @param

bma_seed = 0  # @param
bne_seed = 0 # @param

# Assemble into configs.
bma_model_config = DEFAULT_GP_CONFIG.copy()
map_config = DEFAULT_MAP_CONFIG.copy()
mcmc_config = DEFAULT_MCMC_CONFIG.copy()

bma_model_config.update(dict(lengthscale=lengthscale,
                             l2_regularizer=l2_regularizer,
                             y_noise_std=y_noise_std))

map_config.update(dict(learning_rate=map_step_size,
                       num_steps=map_num_steps))

mcmc_config.update(dict(step_size=mcmc_step_size, 
                        num_steps=mcmc_num_steps))

### Model Configs

In [12]:
# Optimization configs. 
# Consider reduce below parameters / set to `False` if MCMC is taking too long:
# mcmc_num_steps, mcmc_burnin, mcmc_nchain, mcmc_initialize_from_map.
map_step_size=5e-4   # @param
map_num_steps=10_000  # @param

mcmc_step_size=1e-4 # @param
mcmc_num_steps=1000 # @param

mcmc_nchain=1 # @param
mcmc_burnin=100 # @param
bne_mcmc_initialize_from_map="True" # @param ["False", "True"]

bne_mcmc_initialize_from_map = eval(bne_mcmc_initialize_from_map)


In [13]:
# BMA parameters.
y_noise_std = 0.01  # Note: Changed from 0.1 # @param
bma_gp_lengthscale = 1. # @param
bma_gp_l2_regularizer = 0.1 # @param

bma_n_samples_train = 100 # @param
bma_n_samples_eval = 250 # @param
bma_n_samples_test = 250 # @param
bma_seed = 0 # @param


In [14]:
# BNE parameters.
bne_gp_lengthscale = 4 # 5. # @param
bne_gp_l2_regularizer = 5 # 15 # @param
bne_variance_prior_mean = -2.5 # @param
bne_skewness_prior_mean = -2.5 # @param
bne_seed = 0 # @param

In [15]:
bma_config=dict(gp_lengthscale=bma_gp_lengthscale,
                gp_l2_regularizer=bma_gp_l2_regularizer,
                y_noise_std=y_noise_std,
                map_step_size=map_step_size,
                map_num_steps=map_num_steps,
                mcmc_step_size=mcmc_step_size,
                mcmc_num_steps=mcmc_num_steps,
                mcmc_initialize_from_map=False,
                n_samples_eval=bma_n_samples_eval,
                n_samples_train=bma_n_samples_train,
                n_samples_test=bma_n_samples_test,
                seed=bma_seed)

bne_config = dict(gp_lengthscale=bne_gp_lengthscale,
                  gp_l2_regularizer=bne_gp_l2_regularizer,
                  variance_prior_mean=bne_variance_prior_mean,
                  skewness_prior_mean=bne_skewness_prior_mean,
                  map_step_size=map_step_size,
                  map_num_steps=map_num_steps,
                  mcmc_step_size=mcmc_step_size,
                  mcmc_num_steps=mcmc_num_steps,
                  mcmc_nchain=mcmc_nchain,
                  mcmc_burnin=mcmc_burnin,
                  mcmc_initialize_from_map=bne_mcmc_initialize_from_map,
                  seed=bne_seed)

## Bayesian Model Averaging

A Bayesian ensemble model where ensemble weights $w_k's$ are parameterized by Gaussian process priors:

$y \sim N(\mu(x), \sigma^2)$ 

$\mu(x) = \sum_{k=1}^K w_k(x) * m_k(x) \quad$  where $\{m_k\}_{k=1}^K$ are base model predictions.

$w(x) = softmax(f(x)) \qquad\;\;\;$ where $w=[w_1, \dots, w_K]$ and $f=[f_1, \dots, f_K]$

$f \stackrel{i.i.d.}{\sim} GaussianProcess(0, k)$


In [17]:
# Model configs.
y_noise_std = 0.1  # @param
lengthscale=1.  # @param
l2_regularizer=0.1  # @param

# MCMC configs.
map_step_size=0.1 # @param
map_num_steps=10_000 # @param

mcmc_step_size=0.1 # @param
mcmc_num_steps=10_000 # @param

# Posterior configs.
bma_n_samples_train = 100 # @param
bma_n_samples_test = 200 # @param
bma_n_samples_eval = 1000  # @param

bma_seed = 0  # @param
bne_seed = 0 # @param

# Assemble into configs.
bma_model_config = DEFAULT_GP_CONFIG.copy()
map_config = DEFAULT_MAP_CONFIG.copy()
mcmc_config = DEFAULT_MCMC_CONFIG.copy()

bma_model_config.update(dict(lengthscale=lengthscale,
                             l2_regularizer=l2_regularizer,
                             y_noise_std=y_noise_std))

map_config.update(dict(learning_rate=map_step_size,
                       num_steps=map_num_steps))

mcmc_config.update(dict(step_size=mcmc_step_size, 
                        num_steps=mcmc_num_steps))

### Build Model

In [19]:
bma_prior, bma_gp_config = bma_dist(X_train1, 
                                    base_preds_train, 
                                    **bma_model_config)

bma_model_config.update(bma_gp_config)

# Check if the model graph is specified correctly.
bma_prior.resolve_graph()

  f"The initializer {self.__class__.__name__} is unseeded "


(('gp_weights', ()), ('y', ('gp_weights',)))

In [20]:
bma_prior

<tfp.distributions.JointDistributionNamedAutoBatched 'JointDistributionNamedAutoBatched' batch_shape=[] event_shape={gp_weights: [128, 4], y: [3108, 1]} dtype={gp_weights: float32, y: float32}>

In [21]:
display(map_config,mcmc_config,bma_model_config)

{'learning_rate': 0.1, 'num_steps': 10000}

{'step_size': 0.1, 'num_steps': 10000, 'burnin': 2500, 'nchain': 10, 'seed': 0}

{'lengthscale': 1.0,
 'l2_regularizer': 0.1,
 'hidden_units': 128,
 'y_noise_std': 0.1,
 'units': 4,
 'seed': 0}

### Run MCMC

In [22]:
mcmc_nchain = 10
bma_gp_w_samples = run_posterior_inference(model_dist=bma_prior, 
                                           model_config=bma_model_config,
                                           Y=Y_train, 
                                           map_config=map_config,
                                           mcmc_config=mcmc_config)


bma_joint_samples = make_bma_samples(X_test1, Y_test, base_preds_test, 
                                     bma_weight_samples=bma_gp_w_samples[0],
                                     bma_model_config=bma_model_config, 
                                     n_samples=bma_n_samples_eval, 
                                     seed=bne_seed,
                                     y_samples_only=False)



Running MAP:	10129749573632.0...13029293056.0...12917168128.0...12850932736.0...12814694400.0...12798589952.0...12791627776.0...13083279360.0...13074496512.0...12848691200.0...Done.
Running MCMC:	Acceptance Ratio: 0.32959994673728943


In [23]:
bma_ensemble_weights = bma_joint_samples['ensemble_weights']

In [24]:
training2010[["lon", "lat"]]

Unnamed: 0,lon,lat
0,-86.645649,32.540091
1,-87.726272,30.738314
2,-85.397327,31.874030
3,-87.125260,32.999024
4,-86.562711,33.990440
...,...,...
3103,-108.878999,41.656512
3104,-110.570974,43.713556
3105,-110.553036,41.289323
3106,-107.679282,43.909060


In [36]:
ensemble_weights_val = tf.reduce_mean(bma_ensemble_weights, axis=0)
#coordinate = np.asarray(training2010[["lon", "lat"]].values.tolist()).astype(np.float32)
weights_dict = {
    "acs": ensemble_weights_val[:, 0],
    "pep": ensemble_weights_val[:,1],
    "worldpop": ensemble_weights_val[:, 2],
    "fb": ensemble_weights_val[:, 3]
}

color_norm_weights = make_color_norm(
    list(weights_dict.values())[1],   
    method="percentile")

for base_model_name in models:
    posterior_heatmap_2d(weights_dict[base_model_name], coordinate,
                         cmap='viridis',
                         #norm=color_norm_weights, norm_method="percentile",
                         save_addr='')
                         #save_addr='./pic/'+base_model_name)

NameError: name 'make_color_norm' is not defined

! conda install geopandas==0.3.0
! conda install pyshp==1.2.10
! conda install shapely==1.6.3

In [32]:
! conda install plotly
! conda install shapely

Collecting plotly
  Downloading plotly-5.10.0-py2.py3-none-any.whl (15.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.2/15.2 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.1.0-py3-none-any.whl (23 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.10.0 tenacity-8.1.0


In [33]:
import plotly.figure_factory as ff

fips = ['06021', '06023', '06027',
        '06029', '06033', '06059',
        '06047', '06049', '06051',
        '06055', '06061']
values = range(len(fips))

fig = ff.create_choropleth(fips=fips, values=values)
fig.layout.template = None
fig.show()

ImportError: geopandas, pyshp and shapely must be installed for this figure factory.

Run the following commands to install the correct versions of the following modules:

```
$ pip install geopandas==0.3.0
$ pip install pyshp==1.2.10
$ pip install shapely==1.6.3
```
If you are using Windows, follow this post to properly install geopandas and dependencies:http://geoffboeing.com/2014/09/using-geopandas-windows/

If you are using Anaconda, do not use PIP to install the packages above. Instead use conda to install them:

```
$ conda install plotly
$ conda install geopandas
```

In [None]:
from mpl_toolkits.basemap import Basemap #导入Basemap
import matplotlib.pyplot as plt  #导入 matplotlib.pyplot
fig=plt.figure(figsize=(16, 8)) #表示figure 的大小为宽、长（单位为英寸）
#lat_1：南纬度+4，lat_2=北纬度-4，lon_0=-100，设定经度的中心
m= Basemap(llcrnrlon=-130, llcrnrlat=20, urcrnrlon=-65, urcrnrlat=49,projection='lcc',lat_1=24, lat_2=45, lon_0=-100)
m.drawcountries(linewidth=1.5) # 开始画上国家
m.drawcoastlines()  #把海岸线画上
m.drawmapboundary(fill_color = 'blue')# 首先给地球涂上蓝色的一层
m.drawstates()        # 绘制州
m.drawcounties()      # 绘制县，这里好象没有县的划分。
m.fillcontinents(color = 'yellow', lake_color = 'aqua')# 再给大陆涂上黄色,给江河湖泊涂上水蓝的颜色
plt.show()
fig.savefig('../test/America.jpg',dpi=600) 

