In [None]:
import os
import plotly.express as px
import numpy as np
import boto3
import json
import pandas as pd

os.environ['HAVEN_DATABASE'] = 'haven'
os.environ['AWS_PROFILE'] = 'admin'

from mirrorverse.utils import read_data_w_cache

In [None]:
data = read_data_w_cache(
    'select * from movement_model_experiment_10_1_10'
)
print(data.shape)
data.head()

In [None]:
bri, loss, epoch = data.sort_values('val_loss', ascending=True)[['run_id', 'val_loss', 'epoch']].values[0]
bri, loss, epoch

In [None]:
px.line(data.sort_values(['run_id', 'epoch']), x='epoch', y='train_loss', color='run_id')

In [None]:
px.line(data.sort_values(['run_id', 'epoch']), x='epoch', y='val_loss', color='run_id')

In [None]:
s3_client = boto3.client('s3')
bucket = 'mimic-log-odds-models'
prefix = 'movement-model-10-1-2'

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)

configs = {}
if 'Contents' in response:
    for obj in response['Contents']:
        if obj['Key'].endswith('config.json'):
            config = json.loads(s3_client.get_object(Bucket=bucket, Key=obj['Key'])['Body'].read().decode('utf-8'))
            if 'model' in config:
                configs[config['run_id']] = config['model']
else:
    print("No objects found.")

In [None]:
configs[bri]

In [None]:
cid = data[data['run_id'].str.startswith('c6a')]['run_id'].unique()[0]
print(cid)

In [None]:
configs[cid]

In [None]:
nris = []
for run_id, config in configs.items():
    if config['dropout'] > 0:
        nris.append(run_id)

In [None]:
rows = []
for run_id, config in configs.items():
    row = {
        key: value
        for key, value in config.items()
        if key not in ['optimizer_kwargs', 'layers']
    }
    row['run_id'] = run_id
    rows.append(row)
cdf = pd.DataFrame(rows).merge(
    data[data['epoch'] == 250], on='run_id', how='inner'
)
cdf

In [None]:
cdf[cdf['batch_size'] == 7500].sort_values('val_loss', ascending=True)['run_id'].values[0]

In [None]:
cdf.groupby(['learning_rate'])['val_loss'].median()

In [None]:
cdf[(cdf['layer_size'] == 16) & (cdf['num_layers'] == 3) & (cdf['dropout'] == 1)]['val_loss'].describe()

In [None]:
px.scatter(
    cdf[cdf['dropout'] == 1], x='layer_size', y='num_layers', color='val_loss'
)

In [None]:
px.scatter(
    cdf[cdf['dropout'] == 1], x='layer_size', y='num_layers', color='val_loss'
)

In [None]:
cdf['diff'] = cdf['val_loss'] - cdf['train_loss']

In [None]:
px.scatter(
    cdf[cdf['layer_size'] == 32], x='dropout', y='num_layers', color='diff'
)

In [None]:
px.scatter(
    cdf[cdf['layer_size'] == 32], x='num_layers', y='diff'
)

In [None]:
px.scatter(
    cdf[cdf['layer_size'] == 16], x='batch_size', y='val_loss'
)

In [None]:
df  =(
    cdf[(cdf['num_layers'] == 4) & (cdf['layer_size'] == 24)]
    .groupby(['dropout', 'learning_rate', 'batch_size'])
    [['val_loss', 'train_loss']].mean().reset_index()
)
df = df[df['learning_rate'] == 0.001]
tdf = df.copy().rename({'train_loss': 'loss'}, axis=1)
vdf = df.copy().rename({'val_loss': 'loss'}, axis=1)
tdf['case'] = 'train'
vdf['case'] = 'val'

df = pd.concat([
    tdf[['case', 'loss', 'dropout']],
    vdf[['case', 'loss', 'dropout']],
])
px.scatter(
    df, x='dropout', y='loss', color='case'
)

In [None]:
data = read_data_w_cache(
    "select * from movement_model_inference_m9_a2_v9 where run_id = '985acb97fdf84aaef5f7076a7c63bf93a7c6ee6703e71e7ffbf4b24743a0a773'"
)
data['ll'] = np.log(data['probability'])
print(data.shape)
data.head()

In [None]:
data[~data['_train'] & data['_selected']].groupby(['_individual'])[['ll']].mean().reset_index()[['ll']].mean()

In [None]:
move = data[data['stay_put'] == 0]
move['sum_odds'] = move.groupby(['_individual', '_decision'])['odds'].transform('sum')
move['probability'] = move['odds'] / move['sum_odds']
move['ll'] = np.log(move['probability'])

In [None]:
move[~move['_train'] & move['_selected']].groupby(['_individual'])[['ll']].mean().reset_index()[['ll']].mean()

In [None]:
np.exp(-1.59+1.74)

In [None]:
np.log(1/7)

In [None]:
np.exp(1.95 - 1.12)