In [174]:
import os
import plotly.express as px
import numpy as np
import h3
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import cartopy.crs as ccrs

os.environ['HAVEN_DATABASE'] = 'haven'
os.environ['AWS_PROFILE'] = 'admin'

from mirrorverse.utils import read_data_w_cache

In [175]:
cmap = plt.cm.RdBu
extents_map = {
    0: [-167, -153, 52, 58],
    1: [-172, -150, 50, 58],
    2: [-155, -140, 55, 62],
    3: [-155, -144, 57, 62],
    4: [-148, -130, 52, 62],
    5: [-137, -130, 51, 58],
}

def catch_region_map(tag_key):
    for i, _id in enumerate(['172', '202', '159', '205', '210', '229', '142']):
        if tag_key.startswith(_id):
            return i
    return -1

def setup(ax, extent):
    ax.set_extent(extent, crs=ccrs.PlateCarree())
    ax.coastlines()

def quiver(ax, df, col):
    the_max = max(df[col].max(), -df[col].min())
    norm = Normalize(vmin=-the_max, vmax=the_max)

    return ax.quiver(
        df['x'], df['y'], df['u'], df['v'], df[col],
        transform=ccrs.PlateCarree(), 
        cmap=cmap, norm=norm,  
        width=0.005,
        scale=12,
        linestyle=[':']
    )

def plot_it(df, val, stretch=[0,0,0,0]):
    catch_region = df['catch_region'].values[0]
    extent = extents_map[catch_region]


    extent = [e + s for e,s in zip(extent, stretch)]

    fig, axes = plt.subplots(figsize=(10, 5), ncols=1, nrows=1, subplot_kw={'projection': ccrs.PlateCarree()})
    setup(axes, extent)
    q = quiver(axes, df, val)
    cbar_axes = fig.add_axes([0.12, 0.89, 0.35, 0.02])
    fig.colorbar(q, cax=cbar_axes, orientation='horizontal')

    fig.subplots_adjust(hspace=0.1, wspace=0.1)
    fig.subplots_adjust(top=0.8)
    return fig

def prep_models(new, old):
    new = new[new['_selected']]
    old = old[old['_selected']]

    new['origin_lat'] = new['origin_h3_index'].apply(lambda h: h3.h3_to_geo(h)[0])
    new['origin_lon'] = new['origin_h3_index'].apply(lambda h: h3.h3_to_geo(h)[1])
    new['next_lat'] = new['next_h3_index'].apply(lambda h: h3.h3_to_geo(h)[0])
    new['next_lon'] = new['next_h3_index'].apply(lambda h: h3.h3_to_geo(h)[1])

    new['x'] = new['origin_lon']
    new['y'] = new['origin_lat']
    new['u'] = new['next_lon'] - new['origin_lon']
    new['v'] = new['next_lat'] - new['origin_lat']

    new['catch_region'] = new['tag_key'].apply(catch_region_map)

    new = new[[
        'tag_key', '_decision', '_train', 'log_likelihood',
        'x', 'y', 'u', 'v', 'catch_region', 'time'
    ]]
    old = old[[
        'tag_key', '_decision', '_train', 'log_likelihood'
    ]]
    df = new.merge(
        old, on=['_train', '_decision', 'tag_key'], 
        suffixes=('_new', '_old'), how='inner'
    )
    df['new - old'] = df['log_likelihood_new'] - df['log_likelihood_old']
    return df

In [177]:
v1 = read_data_w_cache(
    "select * from movement_model_inference_m6_a1_v5 where run_id = '9b1d4ef26b4a7c90c682eee8390b9495932e3129cda6dcd1c7907341460c0a37'"
)
v1['log_likelihood'] = np.log(v1['probability'])

v2 = read_data_w_cache(
    "select * from movement_model_inference_m6_a1_v6 where run_id = '986d5ab0bee4dd2dee27120fab138ac28f82d69266821ff153c2d4c33308d54e'"
)
v2['log_likelihood'] = np.log(v2['probability'])

v3 = read_data_w_cache(
    "select * from movement_model_inference_m6_a2_v5 where run_id = '4e7e3c752e83a5121ae226a255039d8b1494a8532b726fad875a884e8b41cd93'"
)
v3['log_likelihood'] = np.log(v3['probability'])


v4 = read_data_w_cache(
    "select * from movement_model_inference_m6_a2_v6 where run_id = '105688304ba63a2343eefa82762780a34db30735bdc36338008698ae2325f174'"
)
v4['log_likelihood'] = np.log(v4['probability'])

v5 = read_data_w_cache(
    "select * from movement_model_inference_m8_a3_v1"
)
v5['log_likelihood'] = np.log(v5['probability'])

v0 = v2.copy()
v0['odds'] = 1.0
v0['sum_odds'] = v0.groupby(['_individual', '_decision'])['odds'].transform('sum')
v0['probability'] = v0['odds'] / v0['sum_odds']
v0['log_likelihood'] = np.log(v0['probability'])

In [178]:
def compare_models(new, old, color, agg):
    df = old[old['_selected']][['_train', 'tag_key', '_decision', 'log_likelihood', color]].merge(
        new[new['_selected']][['_train', 'tag_key', '_decision', 'log_likelihood']],
        on=['_train', 'tag_key', '_decision'], how='inner', suffixes=('_old','_new')
    )
    df['new - old'] = df['log_likelihood_new'] - df['log_likelihood_old']
    df = df.groupby(['_train', 'tag_key', color])['new - old'].agg(agg).reset_index()
    return px.bar(
        df.sort_values('tag_key'),
        x='tag_key', y='new - old', color=color,
        facet_row='_train'
    )

In [None]:
new, old = v5, v0

_filter = new.groupby(['_individual'])['_decision'].nunique().reset_index()
_filter = _filter[_filter['_decision'] >= 10]
_filter = _filter[['_individual']]

new = new.merge(_filter)
old = old.merge(_filter)

months = list(range(13))
new['month'] = new['time'].dt.month
new = new[new['month'].isin(months)]

old['month'] = old['time'].dt.month
old = old[old['month'].isin(months)]

sel = prep_models(new, old)

In [None]:
new[new['_selected'] & ~new['_train']].groupby(['_individual'])['log_likelihood'].mean().reset_index()['log_likelihood'].mean()

In [None]:
new[new['_selected'] & ~new['_train']]['log_likelihood'].mean()

In [None]:
old[old['_selected'] & ~old['_train']].groupby(['_individual'])['log_likelihood'].mean().reset_index()['log_likelihood'].mean()

In [None]:
old[old['_selected'] & ~old['_train']]['log_likelihood'].mean()

In [None]:
np.exp(-1.59+ 1.7422378714788238)

In [None]:
compare_models(new, old, 'normed_fl', np.mean).show()

In [None]:
# left lon, right lon, bottom lat, top lat
tag_key = '229206'
stretch = [-5,5,-1,3]
df = sel[(sel['tag_key'] == tag_key)]
plot_it(df, 'new - old', stretch).show()

In [None]:
v2[v2['_selected'] & (v2['tag_key'] == tag_key)][['time', 'region']].sort_values('time')

In [None]:
v1['month'] = v1['time'].dt.month
px.scatter(
    v1[v1['_selected'] & ~v1['_train']].groupby('month')['log_likelihood'].agg(['mean', 'count']).reset_index(),
    x='month', y='mean', size='count'
)

In [None]:
v4['month'] = v4['time'].dt.month
px.scatter(
    v4[v4['_selected'] & ~v4['_train']].groupby('month')['log_likelihood'].agg(['mean', 'count']).reset_index(),
    x='month', y='mean', size='count'
)

In [None]:
v3['month'] = v3['time'].dt.month
px.scatter(
    v3[v3['_selected'] & ~v3['_train']].groupby('month')['log_likelihood'].agg(['mean', 'count']).reset_index(),
    x='month', y='mean', size='count'
)

In [None]:
np.exp(-1.8)

In [None]:
np.exp(-1.3)