In [60]:
%matplotlib inline

import os 
import json 

import boto3
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from mirrorverse.utils import read_data_w_cache

os.environ['HAVEN_DATABASE'] = 'haven'
os.environ['AWS_PROFILE'] = 'admin'

COLOR_PALETTE = ['#648FFF', '#785EF0', '#DC267F', '#FE6100', '#FFB000']

In [67]:
s3_client = boto3.client('s3')
bucket = 'mimic-log-odds-models'
version = 7
prefix = f'movement-model-m3-a4-v{version}'

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)

configs = {}
if 'Contents' in response:
    for obj in response['Contents']:
        if obj['Key'].endswith('config.json'):
            config = json.loads(s3_client.get_object(Bucket=bucket, Key=obj['Key'])['Body'].read().decode('utf-8'))
            if 'model' in config:
                configs[config['run_id']] = config['model']
else:
    print("No objects found.")

rows = []
for run_id, config in configs.items():
    if 'num_layers' in config:
        rows.append({
            'run_id': run_id,
            'num_layers': config['num_layers'],
            'neurons': int(config['layers'][0][1:]),
            'dropout': 0.0 if not config['layers'][1].startswith('Dropout') else float(config['layers'][1][-1])/10,
            'learning_rate': config['optimizer_kwargs']['learning_rate']
        })

configs_data = pd.DataFrame(rows)

results = (
    read_data_w_cache(f'select * from movement_model_experiment_m3_a4_v{version}')
    .sort_values(['run_id', 'epoch'], ascending=True)
)
results = results[['run_id', 'loss', 'val_loss', 'train_loss', 'epoch']].merge(configs_data)
results.head()

Unnamed: 0,run_id,loss,val_loss,train_loss,epoch,num_layers,neurons,dropout,learning_rate
0,1aa0eaa4a1915ceacce96f275d442e29a81b390f65df08...,0.617224,0.582149,0.576533,1,3,24,0.0,0.0005
1,1aa0eaa4a1915ceacce96f275d442e29a81b390f65df08...,0.535804,0.49407,0.487063,2,3,24,0.0,0.0005
2,1aa0eaa4a1915ceacce96f275d442e29a81b390f65df08...,0.44089,0.400777,0.393606,3,3,24,0.0,0.0005
3,1aa0eaa4a1915ceacce96f275d442e29a81b390f65df08...,0.359286,0.331911,0.325293,4,3,24,0.0,0.0005
4,1aa0eaa4a1915ceacce96f275d442e29a81b390f65df08...,0.301966,0.283729,0.278332,5,3,24,0.0,0.0005


In [68]:
final = results.groupby('run_id')[['val_loss', 'train_loss', 'loss', 'epoch', 'neurons', 'dropout', 'num_layers', 'learning_rate']].last().reset_index().sort_values('val_loss', ascending=True)
final.head()

Unnamed: 0,run_id,val_loss,train_loss,loss,epoch,neurons,dropout,num_layers,learning_rate
2,49ee9ca7158e7620df5cc726ab286e7f5069184aeb9579...,0.146438,0.147221,0.147675,100,24,0.0,3,0.0005
15,d1168ada822f9ef7a21d41564cfa5c2711ddfd6e958731...,0.146463,0.144543,0.145188,100,24,0.0,4,0.0005
1,1c58cfa7ceac02b9ae8081123c9a992f25ba22396f6325...,0.146603,0.144998,0.145684,100,24,0.0,4,0.0005
3,5b6f3c5b7285e321c9664c70209ded0da4216c16c8f2ca...,0.146746,0.147589,0.147961,100,32,0.0,3,0.0005
18,f22d0a4346d3d2ea07507749f086cc792d54439be99228...,0.146836,0.147958,0.148443,100,24,0.0,4,0.0005


In [69]:
x = final.sort_values('val_loss', ascending=True)['run_id'].values[0]
x

'49ee9ca7158e7620df5cc726ab286e7f5069184aeb9579da8694ebd91d43b3ec'

In [56]:
px.scatter(final, x='train_loss', y='val_loss')

In [58]:
x = final.sort_values('val_loss', ascending=True)['run_id'].values[0]
subset = results[results['run_id'] == x]
val = subset[['epoch', 'val_loss']].rename({'val_loss': 'loss'}, axis=1)
train = subset[['epoch', 'train_loss']].rename({'train_loss': 'loss'}, axis=1)
val['case'] = 'val'
train['case'] = 'train'
px.line(pd.concat([val, train]), x='epoch', y='loss', color='case')

In [36]:
px.scatter(final, x='learning_rate', y='val_loss')

In [21]:
results.groupby('run_id')[['val_loss', 'neurons', 'num_layers']].min().sort_values('val_loss')

Unnamed: 0_level_0,val_loss,neurons,num_layers
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
df367dfbf91812f56661b8bbd7baeafd736a7eb34532d57882105296530d0bda,0.145759,24,3
893d6e332bf40e4248340cb63f59c4aeaf2959c2efa711c0289f7d889761aaba,0.145801,24,4
f64f30f7229e8ce0585f42b2dd819143b83ce84b022b2ff99e4469af6820c695,0.146128,32,2
f53cf9f2e88d5eb323a3c5588cf4f9e9283c2f78fb0b45da13e1eff463718042,0.146212,24,4
dba93e11aa2c03f2d1fd48e215da9f354c665d2c0ecd8df06f63142388b8250a,0.146226,16,3
2b70b971566e8cfc934fd9ef2ca15b25a84e446fad4151f953cac948b6a6d14b,0.146297,32,3
0860e23fc2410790a51395ea76e9207c3f3c4c2f1d5c65aac8e6f294d3f81433,0.146473,24,3
d4d280ab098e1c1ab8df05de9c8766a0c86b15813b409cf16889e4b3e15ddba5,0.146585,16,2
20c46e4f8826d62d4aa4fb8678d07bf41defddb798fad61ce49a8fad7e801dbc,0.146601,32,2
c98bb56a7d5c4ce058e26c97889da98188b2551a65afe407371388cd3c5fd166,0.146674,32,4


In [18]:
results[results['run_id'] == 'c98bb56a7d5c4ce058e26c97889da98188b2551a65afe407371388cd3c5fd166']

Unnamed: 0,run_id,loss,val_loss,train_loss,epoch,num_layers,neurons,dropout,learning_rate
1700,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.424944,0.273521,0.263701,1,4,32,0.0,0.002
1701,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.198154,0.167105,0.17296,2,4,32,0.0,0.002
1702,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.169635,0.158877,0.163846,3,4,32,0.0,0.002
1703,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.162709,0.152768,0.159165,4,4,32,0.0,0.002
1704,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.159066,0.150203,0.156651,5,4,32,0.0,0.002
1705,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.156991,0.148731,0.154978,6,4,32,0.0,0.002
1706,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.156373,0.148199,0.154078,7,4,32,0.0,0.002
1707,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.155366,0.147683,0.153226,8,4,32,0.0,0.002
1708,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.154513,0.147297,0.152853,9,4,32,0.0,0.002
1709,c98bb56a7d5c4ce058e26c97889da98188b2551a65afe4...,0.154201,0.147178,0.15204,10,4,32,0.0,0.002


In [59]:
final.groupby(['neurons', 'learning_rate', 'num_layers', 'dropout'])['val_loss'].agg(['min', 'max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,min,max
neurons,learning_rate,num_layers,dropout,Unnamed: 4_level_1,Unnamed: 5_level_1
16,0.0005,2,0.0,0.153319,0.154265
16,0.0005,3,0.0,0.147758,0.150471
16,0.0005,4,0.0,0.147656,0.152456
16,0.001,2,0.0,0.150853,0.151664
16,0.001,3,0.0,0.146226,0.14699
16,0.001,4,0.0,0.147178,0.148499
16,0.002,2,0.0,0.146709,0.148849
16,0.002,3,0.0,0.147061,0.149432
16,0.002,4,0.0,0.148406,0.149354
24,0.0005,2,0.0,0.149959,0.15073
