In [1]:
import os
if os.path.basename(os.getcwd())=='experiments':
    os.chdir("./..")

In [2]:
import wandb
import pandas as pd
from utils.runs_and_configs import *
from utils.data_preparation import *
import json

## 1) Best runs trained on full data

In [3]:
runs = get_runs()
filtered_runs = filter_runs_by_summary(runs, 'accuracy', 0.2)
filtered_runs = list(set(filtered_runs) | set(filter_runs_by_summary(runs, "mean_target_probability", 0.1)))
filtered_runs = filter_runs_by_tags(filtered_runs, "sigma_0_fillna_0")
filtered_runs = filter_runs_by_group(filtered_runs, ["no_training"], included=False)
filtered_runs = filter_runs_by_data_config(filtered_runs, "dataset_type", "full", "train")


In [4]:
get_runs_df(filtered_runs)

Unnamed: 0,name,id,group,time,tags,accuracy,mean_target_probability,RMSE,_step,default_learning_rate
0,dulcet-water-170,38mzdlbm,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T13:37:37,"[accuracy_not_improving, model_converged, mu_0...",0.197871,0.10963,3.826672,28,1
1,rich-cloud-160,26kh3ojr,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T13:14:27,"[accuracy_improving, model_converged, mu_0_per...",0.276032,0.1349,2.762907,2,1
2,worldly-snowball-164,lmt0pngf,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T13:22:39,"[accuracy_not_improving, model_converged, mu_0...",0.541418,0.095609,1.958038,6,1
3,cerulean-monkey-174,27ht7beq,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T13:49:17,"[accuracy_improving, model_converged, mu_0_per...",0.431317,0.13529,2.17835,2,1
4,true-wind-161,3ga9mh7l,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T13:15:33,"[accuracy_not_improving, model_converged, mu_0...",0.43355,0.134744,2.148547,1,1
5,comfy-thunder-180,2fxy7qi9,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T14:01:11,"[accuracy_not_improving, model_converged, mu_0...",0.49234,0.118603,2.066513,12,1
6,trim-energy-182,e4p391ug,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T14:05:38,"[accuracy_improving, model_converged, mu_0_per...",0.427006,0.088914,2.084355,2,1
7,fine-hill-177,34tzeg6k,set_parameters_as_constant_sigma_0_per_customer,2022-08-17T13:55:41,"[accuracy_not_improving, model_converged, mu_0...",0.409452,0.106869,2.217667,12,1


In [5]:
configs = get_runs_configs(filtered_runs)

In [6]:
for config in configs:
    clean_config(config, new_data_info=stratified_data_info(), debug=False)
    set_evaluation(config, max_iter=100, every=5)

In [7]:
safe_configs_to_file(configs, "best_trained_on_full")

In [8]:
configs = read_configs_from_file("best_trained_on_full")

In [9]:
len(configs), configs[0]

(8,
 {'max_iter': 100,
  'data_info': {'train': {'dataset_type': 'train-stratified'},
   'test': {'dataset_type': 'test-stratified'}},
  'update_order': ['sigma_c', 'mu_c', 'mu_a', 'eta_r'],
  'Nc_isconstant': True,
  'mu_0_isconstant': True,
  'sigma_0_isconstant': True,
  'eta_kept_isconstant': True,
  'mean_mu_a_isconstant': False,
  'mean_mu_c_isconstant': True,
  'evaluation_iterations': [1,
   6,
   11,
   16,
   21,
   26,
   31,
   36,
   41,
   46,
   51,
   56,
   61,
   66,
   71,
   76,
   81,
   86,
   91,
   96],
  'beta_sigma_c_isconstant': True,
  'mean_eta_big_isconstant': False,
  'alpha_sigma_c_isconstant': True,
  'mean_eta_kept_isconstant': True,
  'variance_mu_a_isconstant': False,
  'variance_mu_c_isconstant': True,
  'mean_eta_small_isconstant': False,
  'variance_eta_big_isconstant': False,
  'variance_eta_kept_isconstant': True,
  'variance_eta_small_isconstant': False,
  'sigma_0_inverse_square_isconstant': True,
  'parent_run': 'dulcet-water-170',
  'default

## 2) Best runs trained on stratified data

In [19]:
runs = get_runs()
filtered_runs = filter_runs_by_summary(runs, 'accuracy', 0.19)
filtered_runs = list(set(filtered_runs) | set(filter_runs_by_summary(runs, "mean_target_probability", 0.09)))
filtered_runs = filter_runs_by_tags(filtered_runs, "sigma_0_fillna_0")
filtered_runs = filter_runs_by_group(filtered_runs, ["no_training", "different_learning_rates_best_on_full"], included=False)
filtered_runs = filter_runs_by_data_config(filtered_runs, "dataset_type", "train-stratified", "train")


In [20]:
get_runs_df(filtered_runs)

Unnamed: 0,name,id,group,time,tags,accuracy,mean_target_probability,RMSE,_step,default_learning_rate,notes
0,crisp-blaze-228,3q4c97r4,set_parameters_as_constant_stratified_data,2022-08-19T15:48:19,"[accuracy_improving, model_converged, mu_0_per...",0.150039,0.092585,4.185834,2,1,"cvcc, statified dataset, sigma_0 initiated per..."
1,scarlet-night-216,3jvjsnlg,set_parameters_as_constant_stratified_data,2022-08-19T15:31:31,"[accuracy_not_improving, model_converged, mu_0...",0.241686,0.063839,3.523315,6,1,"ccvv, statified dataset, sigma_0 initiated per..."
2,stoic-smoke-222,1o01dum0,set_parameters_as_constant_stratified_data,2022-08-19T15:40:47,"[accuracy_not_improving, model_converged, mu_0...",0.196338,0.065829,3.754678,10,1,"cvvc, statified dataset, sigma_0 initiated per..."
3,brisk-sun-223,2qf4lgui,set_parameters_as_constant_stratified_data,2022-08-19T15:43:00,"[accuracy_improving, model_converged, mu_0_per...",0.210935,0.095127,3.711304,2,1,"cccv, statified dataset, sigma_0 initiated per..."
4,peach-fog-224,2q3d4mh5,set_parameters_as_constant_stratified_data,2022-08-19T15:43:44,"[accuracy_improving, model_converged, mu_0_per...",0.206098,0.06001,3.654038,2,1,"ccvc, statified dataset, sigma_0 initiated per..."
5,radiant-silence-206,1j6oo7ms,set_parameters_as_constant_stratified_data,2022-08-19T15:16:58,"[accuracy_improving, model_converged, mu_0_per...",0.232357,0.072263,3.606114,11,1,"cvvv, statified dataset, sigma_0 initiated per..."
6,sunny-field-208,35g5euxu,set_parameters_as_constant_stratified_data,2022-08-19T15:19:13,"[accuracy_not_improving, model_converged, mu_0...",0.211627,0.095064,3.702542,1,1,"cccc, statified dataset, sigma_0 initiated per..."


In [25]:
configs = get_runs_configs(filtered_runs)
for config in configs:
    clean_config(config, new_data_info=stratified_data_info(), debug=False)
    set_evaluation(config, max_iter=100, evaluation_iterations=[1,2,6,8,10,15,20,50,100])

In [26]:
safe_configs_to_file(configs, "best_trained_on_stratified_train")
configs = read_configs_from_file("best_trained_on_stratified_train")

In [27]:
len(configs), configs[0]

(7,
 {'max_iter': 100,
  'data_info': {'train': {'dataset_type': 'train-stratified'},
   'test': {'dataset_type': 'test-stratified'}},
  'update_order': ['sigma_c', 'mu_c', 'mu_a', 'eta_r'],
  'Nc_isconstant': True,
  'mu_0_isconstant': True,
  'sigma_0_isconstant': True,
  'eta_kept_isconstant': True,
  'mean_mu_a_isconstant': False,
  'mean_mu_c_isconstant': True,
  'evaluation_iterations': [1, 2, 6, 8, 10, 15, 20, 50, 100],
  'beta_sigma_c_isconstant': True,
  'mean_eta_big_isconstant': True,
  'alpha_sigma_c_isconstant': True,
  'mean_eta_kept_isconstant': True,
  'variance_mu_a_isconstant': False,
  'variance_mu_c_isconstant': True,
  'mean_eta_small_isconstant': True,
  'variance_eta_big_isconstant': True,
  'variance_eta_kept_isconstant': True,
  'variance_eta_small_isconstant': True,
  'sigma_0_inverse_square_isconstant': True,
  'parent_run': 'crisp-blaze-228',
  'default_learning_rate': 1})