In [18]:
import os 
os.environ['AWS_PROFILE'] = 'admin'
os.environ['HAVEN_DATABASE'] = 'haven'

import plotly.express as px 
import h3 
import numpy as np
import pandas as pd

from mirrorverse.utils import read_data_w_cache

In [61]:
sql = '''
select 
    time, epoch, h3_index, n_depth_bin, probability
from 
    chinook_depth_full_inference_3_1_18_2
where 
    time in (
        CAST('2022-02-15' as DATE),
        CAST('2022-08-15' as DATE)
    )
'''
data = read_data_w_cache(sql)
print(data.shape)
data.head()

(530256, 5)


Unnamed: 0,time,epoch,h3_index,n_depth_bin,probability
0,2022-08-15,1660528800,841d907ffffffff,0.9,0.000265
1,2022-08-15,1660600800,841d059ffffffff,0.7,0.006713
2,2022-08-15,1660528800,8422d01ffffffff,0.1,0.403569
3,2022-08-15,1660528800,8422d01ffffffff,0.6,0.020322
4,2022-08-15,1660528800,8422d01ffffffff,0.8,0.000881


In [62]:
base = (
    data[['n_depth_bin']].drop_duplicates()
    .merge(
        data[['epoch', 'time']].drop_duplicates(), how='cross'
    )
    .merge(
        data[['h3_index']].drop_duplicates(), how='cross'
    )
)
print(base.shape)
base.head()

(721440, 4)


Unnamed: 0,n_depth_bin,epoch,time,h3_index
0,0.9,1660528800,2022-08-15,841d907ffffffff
1,0.9,1660528800,2022-08-15,841d059ffffffff
2,0.9,1660528800,2022-08-15,8422d01ffffffff
3,0.9,1660528800,2022-08-15,841db13ffffffff
4,0.9,1660528800,2022-08-15,841d937ffffffff


In [63]:
data = base.merge(data, on=['h3_index', 'n_depth_bin', 'time', 'epoch'], how='left').fillna(0)
print(data.shape)

(721440, 5)


In [64]:
data['lat'] = data['h3_index'].apply(lambda x: h3.h3_to_geo(x)[0])
data['lon'] = data['h3_index'].apply(lambda x: h3.h3_to_geo(x)[1])
data['hour'] = pd.to_datetime(data['epoch'], unit='s').dt.hour
data.head()

Unnamed: 0,n_depth_bin,epoch,time,h3_index,probability,lat,lon,hour
0,0.9,1660528800,2022-08-15,841d907ffffffff,0.000265,53.170394,-155.14384,2
1,0.9,1660528800,2022-08-15,841d059ffffffff,0.000345,55.353474,-137.262742,2
2,0.9,1660528800,2022-08-15,8422d01ffffffff,0.000166,54.106284,-166.214213,2
3,0.9,1660528800,2022-08-15,841db13ffffffff,0.000264,54.913712,-151.168514,2
4,0.9,1660528800,2022-08-15,841d937ffffffff,0.000269,52.174484,-154.991788,2


In [65]:
#df = base.merge(data, on=['epoch', 'hour', 'time', 'n_depth_bin', 'h3_index'], how='left').fillna(0)
df = data.groupby(['hour', 'n_depth_bin', 'time'])['probability'].mean().reset_index().sort_values('hour')
df['n_depth_bin'] = df['n_depth_bin'].astype(str)
px.bar(
    df,
    x='hour', y='probability', color='n_depth_bin', facet_col='time',
    category_orders={'n_depth_bin': sorted(df['n_depth_bin'].unique(), reverse=True)},
    title='A vs B Season Risk'
)

In [None]:
import plotly.express as px

df = data[
        (data['time'] == data['time'].max())
        & (data['n_depth_bin'] > 0.1)
        & (data['n_depth_bin'] < 0.4)
    ]
df = (
    df.groupby(['lon', 'lat', 'h3_index', 'epoch'])['probability'].sum().reset_index()
    .groupby(['lon', 'lat', 'h3_index'])['probability'].mean().reset_index()
)
df = df[df['probability'] > 10 ** -10]

# Ensure your DataFrame is named df and contains 'lon', 'lat', and 'probability'
fig = px.scatter_mapbox(
    df,
    lat='lat',
    lon='lon',
    color='probability',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)

fig.show()


In [76]:
data.sort_values(['lon', 'lat'])

Unnamed: 0,n_depth_bin,epoch,time,h3_index,probability,lat,lon,hour
902,0.9,1660528800,2022-08-15,840d8b1ffffffff,0.000000,64.710683,-169.938255,2
2405,0.9,1660600800,2022-08-15,840d8b1ffffffff,0.000000,64.710683,-169.938255,22
3908,0.9,1660586400,2022-08-15,840d8b1ffffffff,0.000000,64.710683,-169.938255,18
5411,0.9,1660572000,2022-08-15,840d8b1ffffffff,0.000000,64.710683,-169.938255,14
6914,0.9,1644890400,2022-02-15,840d8b1ffffffff,0.000000,64.710683,-169.938255,2
...,...,...,...,...,...,...,...,...
714794,0.2,1660554000,2022-08-15,84129c1ffffffff,0.117343,52.332028,-128.417532,9
716297,0.2,1660568400,2022-08-15,84129c1ffffffff,0.181616,52.332028,-128.417532,13
717800,0.2,1644886800,2022-02-15,84129c1ffffffff,0.154602,52.332028,-128.417532,1
719303,0.2,1660525200,2022-08-15,84129c1ffffffff,0.240239,52.332028,-128.417532,1


In [81]:
sql = '''
select 
    epoch, h3_index, n_depth_bin, probability
from 
    chinook_depth_full_inference_3_1_18_2
where 
    h3_index = '84129c1ffffffff' /*'840d8b1ffffffff'*/
'''
data = read_data_w_cache(sql)
data['time'] = pd.to_datetime(data['epoch'], unit='s')
print(data.shape)
data.head()

(52560, 5)


Unnamed: 0,epoch,h3_index,n_depth_bin,probability,time
0,1642219200,84129c1ffffffff,0.5,0.098952,2022-01-15 04:00:00
1,1658635200,84129c1ffffffff,0.6,0.037848,2022-07-24 04:00:00
2,1668240000,84129c1ffffffff,0.2,0.148989,2022-11-12 08:00:00
3,1669838400,84129c1ffffffff,0.4,0.189928,2022-11-30 20:00:00
4,1663012800,84129c1ffffffff,0.2,0.235868,2022-09-12 20:00:00


In [82]:
px.line(
    data.sort_values('time'), x='time', y='probability', color='n_depth_bin'
)

In [85]:
data['date'] = data['time'].dt.date

df = data.groupby(['date', 'n_depth_bin'])['probability'].agg(['min', 'max']).reset_index()
px.line(
    df, x='date', y='max', color='n_depth_bin'
)

In [86]:
data['date'] = data['time'].dt.date

df = data.groupby(['date', 'n_depth_bin'])['probability'].agg(['min', 'max']).reset_index()
px.line(
    df, x='date', y='min', color='n_depth_bin'
)

In [117]:
sql = '''
select 
    time, epoch, h3_index, n_depth_bin, probability
from 
    chinook_depth_full_inference_3_1_18_2
where 
    time in (
        CAST('2022-07-15' as DATE)
    )
'''
data = read_data_w_cache(sql)
print(data.shape)
data.head()

(265128, 5)


Unnamed: 0,time,epoch,h3_index,n_depth_bin,probability
0,2022-07-15,1657886400,841d23bffffffff,0.5,0.07401
1,2022-07-15,1657886400,841d23bffffffff,0.9,0.000157
2,2022-07-15,1657861200,8413931ffffffff,0.3,0.119707
3,2022-07-15,1657918800,841d347ffffffff,0.9,0.000459
4,2022-07-15,1657904400,840c48dffffffff,0.2,0.170877


In [118]:
data['hour'] = (pd.to_datetime(data['epoch'], unit='s').dt.hour - 9) % 24
data.head()

Unnamed: 0,time,epoch,h3_index,n_depth_bin,probability,hour
0,2022-07-15,1657886400,841d23bffffffff,0.5,0.07401,3
1,2022-07-15,1657886400,841d23bffffffff,0.9,0.000157,3
2,2022-07-15,1657861200,8413931ffffffff,0.3,0.119707,20
3,2022-07-15,1657918800,841d347ffffffff,0.9,0.000459,12
4,2022-07-15,1657904400,840c48dffffffff,0.2,0.170877,8


In [None]:
df = data.groupby(['h3_index', 'n_depth_bin', 'hour'])['probability'].mean().reset_index()
df = df[
    (df['n_depth_bin'] > 0.1)
    & (df['n_depth_bin'] < 0.3)
    & (df['hour'] >= 4)
    & (df['hour'] < 12)
]
df = df.groupby(['h3_index', 'hour'])['probability'].sum().reset_index()
df['min_probability'] = df.groupby('h3_index')['probability'].transform('min')
df['max_probability'] = df.groupby('h3_index')['probability'].transform('max')
df = df[df['probability'] == df['min_probability']]
df = df.drop_duplicates(['h3_index', 'probability'])
df['lat'] = df['h3_index'].apply(lambda x: h3.h3_to_geo(x)[0])
df['lon'] = df['h3_index'].apply(lambda x: h3.h3_to_geo(x)[1])


# Ensure your DataFrame is named df and contains 'lon', 'lat', and 'probability'
fig = px.scatter_mapbox(
    df,
    lat='lat',
    lon='lon',
    color='hour',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)

fig.show()

In [123]:
# Ensure your DataFrame is named df and contains 'lon', 'lat', and 'probability'
fig = px.scatter_mapbox(
    df,
    lat='lat',
    lon='lon',
    color='probability',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)

fig.show()

In [124]:
# Ensure your DataFrame is named df and contains 'lon', 'lat', and 'probability'

df['diff'] = df['max_probability'] - df['min_probability']
fig = px.scatter_mapbox(
    df,
    lat='lat',
    lon='lon',
    color='diff',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)

fig.show()

In [133]:
sql = '''
with risk as (
    select 
        time, 
        epoch,
        h3_index,
        sum(probability) as risk
    from 
        chinook_depth_full_inference_3_1_18_2
    where 
        n_depth_bin > 0.1 
        and n_depth_bin < 0.3
    group by 
        1, 2, 3
)
select 
    month(time) as month,
    h3_index,
    min(risk) as min_risk_month,
    max(risk) as max_risk_month
from 
    risk
group by 
    1, 2
'''
data = read_data_w_cache(sql)
print(data.shape)
data.head()

(16380, 4)


Unnamed: 0,month,h3_index,min_risk_month,max_risk_month
0,7,841dab7ffffffff,0.146919,0.242775
1,8,841db0dffffffff,0.127581,0.231189
2,6,841d261ffffffff,0.131612,0.226665
3,3,841d099ffffffff,0.083714,0.173527
4,12,841d2b9ffffffff,0.107321,0.180377


In [134]:
data['min_risk_min'] = data.groupby('h3_index')['min_risk_month'].transform('min')
data['min_risk_max'] = data.groupby('h3_index')['min_risk_month'].transform('max')
data['max_risk_max'] = data.groupby('h3_index')['min_risk_month'].transform('max')
data['max_risk_min'] = data.groupby('h3_index')['min_risk_month'].transform('min')

data = data[data['min_risk_min'] == data['min_risk_month']]
data = data.drop_duplicates(['h3_index', 'min_risk_month'])
data['lat'] = data['h3_index'].apply(lambda x: h3.h3_to_geo(x)[0])
data['lon'] = data['h3_index'].apply(lambda x: h3.h3_to_geo(x)[1])
print(data.shape)
data.head()

(1365, 10)


Unnamed: 0,month,h3_index,min_risk_month,max_risk_month,min_risk_min,min_risk_max,max_risk_max,max_risk_min,lat,lon
18,2,841d805ffffffff,0.049897,0.109032,0.049897,0.177062,0.177062,0.049897,53.752588,-151.270452
48,2,841da83ffffffff,0.060193,0.129922,0.060193,0.170639,0.170639,0.060193,53.593934,-146.10663
55,2,84228bdffffffff,0.058541,0.120088,0.058541,0.168067,0.168067,0.058541,53.028478,-162.549214
69,2,841d2d3ffffffff,0.065643,0.147337,0.065643,0.130892,0.130892,0.065643,53.577601,-133.681643
96,4,840cd19ffffffff,0.120907,0.273244,0.120907,0.216546,0.216546,0.120907,56.614557,-163.360389


In [136]:
fig = px.scatter_mapbox(
    data,
    lat='lat',
    lon='lon',
    color='month',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)
fig.show()

In [139]:
fig = px.scatter_mapbox(
    data,
    lat='lat',
    lon='lon',
    color='min_risk_min',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)
fig.show()

In [138]:
data['min_diff'] = data['min_risk_max'] - data['min_risk_min']
fig = px.scatter_mapbox(
    data,
    lat='lat',
    lon='lon',
    color='min_diff',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)
fig.show()

In [140]:
def get_season(month):
    if month <= 4:
        return 'A'
    else:
        return 'B'

data['season'] = data['month'].apply(get_season)

fig = px.scatter_mapbox(
    data,
    lat='lat',
    lon='lon',
    color='season',  # Color points by probability
    size_max=10,  # Adjust as needed
    zoom=3,  # Adjust zoom level
    mapbox_style="carto-positron",  # Choose a map style,
)
fig.show()