This notebook documents part 2 of the complementarity of image and demographic information: the ability of latent space extracted from Autoencoders to predict mode choice and trip generation.

In [1]:
import sys
sys.path.append("models/")

%load_ext autoreload
%autoreload 2
from collections import OrderedDict
import os
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import pickle as pkl
import numpy as np

import itertools
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn import linear_model
from sklearn.metrics import r2_score, mean_squared_error
import statsmodels.api as sm


from dataloader import SurveyDataset, load_aggregate_travel_behavior, load_demo
from M1_util_train_test import load_model, test
import linear_reg
import mnl
from setup import out_dir, data_dir, image_dir, model_dir, proj_dir


In [2]:
data_version = '1571'

model_type = 'AE'
sampling = 's'

zoomlevel = 'zoom15'
output_dim = 3
model_run_date = '2208'
v2 = 1

variable_names = ['active','auto','mas','pt', 'trpgen']

demo_variables = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
         'pctwhite_alone','pct_nonwhite','pctblack_alone',
         'pct_col_grad','avg_tt_to_work','inc_per_capita']


# Load Model Embeddings

In [3]:
with open(proj_dir+"latent_space/"+model_type+"_"+zoomlevel+"_"+str(output_dim**2*2048)+
                       "_"+str(v2)+"_"+model_run_date+".pkl", "rb") as f: 
    encoder_output = pkl.load(f)
    im = pkl.load(f)
    ct = pkl.load(f)

In [4]:
# Aggregate Embeddings
unique_ct = list(set(ct))
unique_ct.sort()
ct = np.array(ct)
aggregate_embeddings = []
for i in unique_ct:
    aggregate_embeddings.append(np.mean(encoder_output[ct == i], axis=0))
aggregate_embeddings = np.array(aggregate_embeddings)

# Load Trip Behavior

In [5]:
file = "origin_trip_behavior.csv"
df_pivot = load_aggregate_travel_behavior(file, data_version)

train_test_index = df_pivot['train_test'].astype(bool).to_numpy()
# train_test_index = np.random.rand(len(df_pivot)) < 0.2

y = df_pivot[variable_names].to_numpy()
y_train = y[~train_test_index,:4]
y_test = y[train_test_index,:4]

In [6]:
x_train = aggregate_embeddings[~train_test_index, :]
x_test = aggregate_embeddings[train_test_index, :]

In [7]:
auto_train = y[~train_test_index,1]
auto_test = y[train_test_index,1]

pt_train = y[~train_test_index,3]
pt_test = y[train_test_index,3]

active_train = y[~train_test_index,0]
active_test = y[train_test_index,0]

trpgen_train = y[~train_test_index,-1]
trpgen_test = y[train_test_index,-1]


# 1. Linear Regression

### 1.1 Auto Share

In [9]:
# Linear Regression without Regularization
lr = linear_model.LinearRegression()
lr.fit(x_train, auto_train)
# with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#     f.write("%s,%s,%s,%.4f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], -1, 
#         lr.score(x_train, auto_train), lr.score(x_test, auto_test), 'lr', zoomlevel,
#         np.sum(lr.coef_ != 0), len(lr.coef_)))
print("Train R2: %.4f \t Test R2: %.4f" % (lr.score(x_train, auto_train), lr.score(x_test, auto_test)))

Train R2: 1.0000 	 Test R2: 0.4236


In [10]:
# Lasso
for a in (1e-4)*np.array([0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,6,7,8,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, auto_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, auto_train), 
                                                                                  lasso.score(x_test, auto_test), 
                                                                                  np.sum(lasso.coef_ != 0)))

    with open(out_dir+"SAE_A_LR.csv", "a") as f:
        f.write("%.2E,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % (0,a,'auto',
            lasso.score(x_train, auto_train), lasso.score(x_test, auto_test), 'lasso', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  lasso.fit(x_train, auto_train)
  model = cd_fast.enet_coordinate_descent(


Parameter: 0.00e+00 Train R2: 1.0000 	 Test R: 0.2702 	 Nonzero coef: 18432


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-05 Train R2: 0.9963 	 Test R: 0.2729 	 Nonzero coef: 3586


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-05 Train R2: 0.9873 	 Test R: 0.3301 	 Nonzero coef: 2557


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-05 Train R2: 0.9595 	 Test R: 0.4343 	 Nonzero coef: 1760


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-05 Train R2: 0.9237 	 Test R: 0.4861 	 Nonzero coef: 1322


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-05 Train R2: 0.8862 	 Test R: 0.5296 	 Nonzero coef: 1039


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-04 Train R2: 0.8492 	 Test R: 0.5658 	 Nonzero coef: 846


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-04 Train R2: 0.7193 	 Test R: 0.6318 	 Nonzero coef: 367


  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.6512 	 Test R: 0.6293 	 Nonzero coef: 207


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.6096 	 Test R: 0.6256 	 Nonzero coef: 133


  model = cd_fast.enet_coordinate_descent(


Parameter: 5.00e-04 Train R2: 0.5818 	 Test R: 0.6194 	 Nonzero coef: 89


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.5610 	 Test R: 0.6049 	 Nonzero coef: 75


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.5448 	 Test R: 0.5915 	 Nonzero coef: 67


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-04 Train R2: 0.5336 	 Test R: 0.5764 	 Nonzero coef: 59


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-03 Train R2: 0.5184 	 Test R: 0.5539 	 Nonzero coef: 45


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-03 Train R2: 0.4647 	 Test R: 0.5048 	 Nonzero coef: 21
Parameter: 5.00e-03 Train R2: 0.3726 	 Test R: 0.3697 	 Nonzero coef: 11


In [None]:
# Ridge

for a in (1e-3)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, auto_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, auto_train), 
                                                              ridge.score(x_test, auto_test)))

### 1.2 PT

In [8]:
# Lasso
for a in (1e-4)*np.array([0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,6,7,8,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, pt_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, pt_train), 
                                                                                  lasso.score(x_test, pt_test), 
                                                                                  np.sum(lasso.coef_ != 0)))

    with open(out_dir+"SAE_A_LR.csv", "a") as f:
        f.write("%.2E,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % (0,a,'pt',
            lasso.score(x_train, pt_train), lasso.score(x_test, pt_test), 'lasso', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  lasso.fit(x_train, pt_train)
  model = cd_fast.enet_coordinate_descent(


Parameter: 0.00e+00 Train R2: 1.0000 	 Test R: -0.0445 	 Nonzero coef: 18432


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-05 Train R2: 0.9830 	 Test R: 0.1063 	 Nonzero coef: 2438


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-05 Train R2: 0.9457 	 Test R: 0.2017 	 Nonzero coef: 1632


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-05 Train R2: 0.8490 	 Test R: 0.3262 	 Nonzero coef: 985


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-05 Train R2: 0.7537 	 Test R: 0.3918 	 Nonzero coef: 680


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-05 Train R2: 0.6800 	 Test R: 0.4334 	 Nonzero coef: 460


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-04 Train R2: 0.6250 	 Test R: 0.4445 	 Nonzero coef: 324


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-04 Train R2: 0.4799 	 Test R: 0.4441 	 Nonzero coef: 102


  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.4334 	 Test R: 0.4154 	 Nonzero coef: 59


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.4091 	 Test R: 0.3995 	 Nonzero coef: 41


  model = cd_fast.enet_coordinate_descent(


Parameter: 5.00e-04 Train R2: 0.3894 	 Test R: 0.3838 	 Nonzero coef: 33


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.3767 	 Test R: 0.3765 	 Nonzero coef: 28


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.3665 	 Test R: 0.3661 	 Nonzero coef: 22
Parameter: 8.00e-04 Train R2: 0.3574 	 Test R: 0.3573 	 Nonzero coef: 18
Parameter: 1.00e-03 Train R2: 0.3468 	 Test R: 0.3471 	 Nonzero coef: 13
Parameter: 2.00e-03 Train R2: 0.3022 	 Test R: 0.2850 	 Nonzero coef: 10
Parameter: 5.00e-03 Train R2: 0.2089 	 Test R: 0.1831 	 Nonzero coef: 1


In [None]:
# Ridge

for a in (1e-3)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, pt_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, pt_train), 
                                                              ridge.score(x_test, pt_test)))

### 1.3 Active

In [9]:
for a in (1e-4)*np.array([0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,6,7,8,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, active_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, active_train), 
                                                                                  lasso.score(x_test, active_test), 
                                                                                  np.sum(lasso.coef_ != 0)))

    with open(out_dir+"SAE_A_LR.csv", "a") as f:
        f.write("%.2E,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % (0,a,'active',
            lasso.score(x_train, active_train), lasso.score(x_test, active_test), 'lasso', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  lasso.fit(x_train, active_train)
  model = cd_fast.enet_coordinate_descent(


Parameter: 0.00e+00 Train R2: 1.0000 	 Test R: -0.1100 	 Nonzero coef: 18432


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-05 Train R2: 0.9928 	 Test R: 0.1367 	 Nonzero coef: 3239


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-05 Train R2: 0.9759 	 Test R: 0.2152 	 Nonzero coef: 2209


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-05 Train R2: 0.9251 	 Test R: 0.2892 	 Nonzero coef: 1480


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-05 Train R2: 0.8663 	 Test R: 0.3498 	 Nonzero coef: 1033


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-05 Train R2: 0.8101 	 Test R: 0.3993 	 Nonzero coef: 810


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-04 Train R2: 0.7578 	 Test R: 0.4332 	 Nonzero coef: 625


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-04 Train R2: 0.6021 	 Test R: 0.4954 	 Nonzero coef: 248


  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.5214 	 Test R: 0.4884 	 Nonzero coef: 135


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.4765 	 Test R: 0.4822 	 Nonzero coef: 85


  model = cd_fast.enet_coordinate_descent(


Parameter: 5.00e-04 Train R2: 0.4487 	 Test R: 0.4720 	 Nonzero coef: 60


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.4340 	 Test R: 0.4593 	 Nonzero coef: 48


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.4242 	 Test R: 0.4497 	 Nonzero coef: 43


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-04 Train R2: 0.4153 	 Test R: 0.4446 	 Nonzero coef: 37


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-03 Train R2: 0.3966 	 Test R: 0.4321 	 Nonzero coef: 30
Parameter: 2.00e-03 Train R2: 0.3294 	 Test R: 0.3565 	 Nonzero coef: 11
Parameter: 5.00e-03 Train R2: 0.2307 	 Test R: 0.2172 	 Nonzero coef: 5


In [None]:
# Ridge

for a in (1e-3)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, active_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, active_train), 
                                                              ridge.score(x_test, active_test)))

### 1.4 Trip Generation

In [None]:
for a in (1e-4)*np.array([0,0.1,6,7,8,10,11,12,13,14,15,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, trpgen_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, trpgen_train), 
                                                                                  lasso.score(x_test, trpgen_test), 
                                                                                  np.sum(lasso.coef_ != 0)))
#     with open(out_dir+"BA_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%.6f,%.4f,%.4f,%s,%d,%d\n" % (a, 
#             lasso.score(x_train, trpgen_train), lasso.score(x_test, trpgen_test), 'lasso', 
#             np.sum(lasso.coef_ != 0), len(lasso.coef_)))

In [None]:
# Ridge

for a in (1e-2)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, trpgen_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, trpgen_train), 
                                                              ridge.score(x_test, trpgen_test)))

# 2. MNL for Mode Share

In [13]:
# dataloader and model definition

trainset = SurveyDataset(torch.tensor(x_train,  dtype=torch.float), torch.tensor(y_train, dtype=torch.float))
trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=False)

testset = SurveyDataset(torch.tensor(x_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.float))
testloader = DataLoader(testset, batch_size=len(testset), shuffle=False)

kldivloss = nn.KLDivLoss(reduction='sum')
mseloss = nn.MSELoss(reduction='none')

In [14]:
sst_train = np.sum(np.power(y_train - np.mean(y_train, axis=0), 2), axis=0)
sst_test = np.sum(np.power(y_test - np.mean(y_test, axis=0), 2), axis=0)

In [22]:
def mnl_torch(lr_list, wd_list):
    
    for (lr, wd) in itertools.product(lr_list, wd_list):
        
        print(f"[lr: {lr:.4f}, wd: {wd:3.2e}]")

        # model setup
        model = mnl.MNL(n_alts=4, n_features=x_train.shape[-1])
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

#         print(optimizer)
        # model training

        converged = 0
        ref1 = 0
        ref2 = 0

        for epoch in range(5000):

            kl_ = 0
            mse_ = 0
            mse1_ = 0
            mse2_ = 0
            mse3_ = 0
            mse4_ = 0

            for batch, (x_batch, y_batch) in enumerate(trainloader):
                
                # Compute prediction and loss
                util = model(x_batch)
                probs = torch.log(nn.functional.softmax(util, dim=1))
                kl = kldivloss(probs, y_batch)
        #         kl = kldivloss(torch.log(util), y_batch)
                kl_ += kl.item()

                mse = mseloss(torch.exp(probs), y_batch)
        #         mse = mseloss(util, y_batch)
                mse_ += mse.sum().item()
                mse1_ += mse[:,0].sum().item()
                mse2_ += mse[:,1].sum().item()
                mse3_ += mse[:,2].sum().item()
                mse4_ += mse[:,3].sum().item()
                mse = mse.sum()

                # Backpropagation
                optimizer.zero_grad()
                kl.backward()
                optimizer.step()

            train_kl = kl_/len(trainset)
            train_mse = np.sqrt(mse_/len(trainset))
            train_mse1 = np.sqrt(mse1_/len(trainset))
            train_mse2 = np.sqrt(mse2_/len(trainset))
            train_mse3 = np.sqrt(mse3_/len(trainset))
            train_mse4 = np.sqrt(mse4_/len(trainset))

            train_r1 = 1-mse1_/sst_train[0]
            train_r2 = 1-mse2_/sst_train[1]
            train_r3 = 1-mse3_/sst_train[2]
            train_r4 = 1-mse4_/sst_train[3]

            loss_ = train_kl

            if epoch % 5 == 0:

                kl_ = 0
                mse_ = 0 
                mse1_ = 0
                mse2_ = 0
                mse3_ = 0
                mse4_ = 0

                for batch, (x_batch, y_batch) in enumerate(testloader):
                    
                    util = model(x_batch)
                    probs = torch.log(nn.functional.softmax(util,dim=1))
                    kl = kldivloss(probs, y_batch)
            #         kl = kldivloss(torch.log(util), y_batch)
                    kl_ += kl.item()

                    mse = mseloss(torch.exp(probs), y_batch)
            #         mse = mseloss(util, y_batch)
                    mse_ += mse.sum().item()
                    mse1_ += mse[:,0].sum().item()
                    mse2_ += mse[:,1].sum().item()
                    mse3_ += mse[:,2].sum().item()
                    mse4_ += mse[:,3].sum().item()

                test_kl = kl_/len(testset)
                test_mse = np.sqrt(mse_/len(testset))
                test_mse1 = np.sqrt(mse1_/len(testset))
                test_mse2 = np.sqrt(mse2_/len(testset))
                test_mse3 = np.sqrt(mse3_/len(testset))
                test_mse4 = np.sqrt(mse4_/len(testset))

                r1 = r2_score(y_batch.numpy()[:,0],torch.exp(probs).detach().numpy()[:,0])
                r2 = r2_score(y_batch.numpy()[:,1],torch.exp(probs).detach().numpy()[:,1])
                r3 = r2_score(y_batch.numpy()[:,2],torch.exp(probs).detach().numpy()[:,2])
                r4 = r2_score(y_batch.numpy()[:,3],torch.exp(probs).detach().numpy()[:,3])

                if epoch >= 40:
                    if (np.abs(loss_ - ref1)/ref1<0.001) & (np.abs(loss_ - ref2)/ref2<0.001):
                        converged = 1
                        print("Early stopping at epoch", epoch)
                        break
                    if (ref1 < loss_) & (ref1 < ref2):
                        print("Diverging. stop.")
                        break
                    if loss_ < best:
                        best = loss_
                        best_epoch = epoch
                        output = (best_epoch, train_kl, train_mse, train_mse1, train_mse2, train_mse3, train_mse4,
                                  test_kl, test_mse, test_mse1, test_mse2, test_mse3, test_mse4,
                                  train_r1, train_r2, train_r3, train_r4, r1, r2, r3, r4)
                else:
                    best = loss_
                    best_epoch = epoch
                    output = (best_epoch, train_kl, train_mse, train_mse1, train_mse2, train_mse3, train_mse4,
                                  test_kl, test_mse, test_mse1, test_mse2, test_mse3, test_mse4,
                                  train_r1, train_r2, train_r3, train_r4, r1, r2, r3, r4)
                ref2 = ref1
                ref1 = loss_

            if epoch % 300 == 0:

                print(f"[epoch: {epoch:>3d}] Train KL loss: {train_kl:.3f} RMSE {train_mse:.3f}")
                   # {train_mse1:.3f} {train_mse2:.3f} {train_mse3:.3f} {train_mse4:.3f}")
                print(f"\t\t\t\t\t\t Train R2 score: {train_r1:.3f} {train_r2:.3f} {train_r3:.3f} {train_r4:.3f} ")
                print(f"[epoch: {epoch:>3d}] Test KL loss: {kl_/len(testset):.3f} RMSE {np.sqrt(mse_/len(testset)):.3f}")
                   #     {np.sqrt(mse1_/len(testset)):.3f} {np.sqrt(mse2_/len(testset)):.3f} {np.sqrt(mse3_/len(testset)):.3f} {np.sqrt(mse4_/len(testset)):.3f}")
                print(f"\t\t\t\t\t\t Test R2 score: {r1:.3f} {r2:.3f} {r3:.3f} {r4:.3f} ")

                print(f"[epoch: {epoch:>3d}] Train KL loss: {train_kl:.3f} Train R2 score: {train_r1:.3f} {train_r2:.3f} {train_r3:.3f} {train_r4:.3f} ")
                print(f"[epoch: {epoch:>3d}] Test KL loss: {kl_/len(testset):.3f} Test R2 score: {r1:.3f} {r2:.3f} {r3:.3f} {r4:.3f} ")

        with open(out_dir+"SAE_A_MNL.csv", "a") as f:
            f.write("%.1E,%.1E,%.1E,%d,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%d\n" % 
                    ((0,lr,wd)+output+(converged,)))

        print(f"[epoch: {best_epoch:>3d}] Train KL loss: {output[1]:.3f} Train R2 score: {output[13]:.3f} {output[14]:.3f} {output[15]:.3f} {output[16]:.3f} ")
        print(f"[epoch: {best_epoch:>3d}] Test KL loss: {output[7]:.3f} Test R2 score: {output[17]:.3f} {output[18]:.3f} {output[19]:.3f} {output[20]:.3f} ")
        print()
        
    return model

In [None]:
for i in range(5):
#     mnl_torch(lr_list=[1e-4], wd_list=[1e-3]);
#     mnl_torch(lr_list=[1e-4], wd_list=[1e-2]);

#     mnl_torch(lr_list=[1e-4], wd_list=[1e-1]);
#     model = mnl_torch(lr_list=[1e-5], wd_list=[1e+0]);
    mnl_torch(lr_list=[1e-4], wd_list=[0.1,1,10,50,100,1000]);
#     mnl_torch(lr_list=[1e-5], wd_list=[1e+1]);
#     mnl_torch(lr_list=[1e-4], wd_list=[50]);

#     mnl_torch(lr_list=[1e-4], wd_list=[1e+2]);

#     mnl_torch(lr_list=[5e-5], wd_list=[1e+3]);

[lr: 0.0001, wd: 1.00e-01]
[epoch:   0] Train KL loss: 0.943 RMSE 0.711
						 Train R2 score: -3.000 -5.447 -5.411 -1.381 
[epoch:   0] Test KL loss: 0.239 RMSE 0.310
						 Test R2 score: -0.071 -0.137 -0.319 -0.085 
[epoch:   0] Train KL loss: 0.943 Train R2 score: -3.000 -5.447 -5.411 -1.381 
[epoch:   0] Test KL loss: 0.239 Test R2 score: -0.071 -0.137 -0.319 -0.085 
[epoch: 300] Train KL loss: 0.164 RMSE 0.242
						 Train R2 score: 0.340 0.438 0.004 0.356 
[epoch: 300] Test KL loss: 0.143 RMSE 0.227
						 Test R2 score: 0.344 0.452 -0.066 0.341 
[epoch: 300] Train KL loss: 0.164 Train R2 score: 0.340 0.438 0.004 0.356 
[epoch: 300] Test KL loss: 0.143 Test R2 score: 0.344 0.452 -0.066 0.341 
[epoch: 600] Train KL loss: 0.152 RMSE 0.227
						 Train R2 score: 0.416 0.514 0.014 0.405 
[epoch: 600] Test KL loss: 0.130 RMSE 0.209
						 Test R2 score: 0.430 0.553 -0.076 0.406 
[epoch: 600] Train KL loss: 0.152 Train R2 score: 0.416 0.514 0.014 0.405 
[epoch: 600] Test KL loss: 0.13

[epoch: 900] Train KL loss: 0.142 RMSE 0.215
						 Train R2 score: 0.480 0.573 0.016 0.447 
[epoch: 900] Test KL loss: 0.118 RMSE 0.195
						 Test R2 score: 0.491 0.628 -0.073 0.462 
[epoch: 900] Train KL loss: 0.142 Train R2 score: 0.480 0.573 0.016 0.447 
[epoch: 900] Test KL loss: 0.118 Test R2 score: 0.491 0.628 -0.073 0.462 
Early stopping at epoch 975
[epoch: 970] Train KL loss: 0.141 Train R2 score: 0.486 0.578 0.018 0.453 
[epoch: 970] Test KL loss: 0.118 Test R2 score: 0.495 0.632 -0.073 0.467 

[lr: 0.0001, wd: 1.00e+00]
[epoch:   0] Train KL loss: 0.582 RMSE 0.556
						 Train R2 score: -0.017 -2.704 -5.607 -4.607 
[epoch:   0] Test KL loss: 0.283 RMSE 0.317
						 Test R2 score: -0.274 -0.140 -0.063 -0.017 
[epoch:   0] Train KL loss: 0.582 Train R2 score: -0.017 -2.704 -5.607 -4.607 
[epoch:   0] Test KL loss: 0.283 Test R2 score: -0.274 -0.140 -0.063 -0.017 
[epoch: 300] Train KL loss: 0.160 RMSE 0.237
						 Train R2 score: 0.367 0.465 0.005 0.365 
[epoch: 300] Test KL

In [32]:
mnl_torch(lr_list=[0.0001], wd_list=[1e-1])

[lr: 0.000, wd: 1.00e-01]
[epoch:   0] Train KL loss: 0.920 RMSE 0.709
						 Train R2 score: -1.233 -5.224 -22.002 -0.707 
[epoch:   0] Test KL loss: 0.478 RMSE 0.364
						 Test R2 score: -0.391 -0.619 -0.961 -0.398 
[epoch:   0] Train KL loss: 0.920 Train R2 score: -1.233 -5.224 -22.002 -0.707 
[epoch:   0] Test KL loss: 0.478 Test R2 score: -0.391 -0.619 -0.961 -0.398 
[epoch: 100] Train KL loss: 0.154 RMSE 0.228
						 Train R2 score: 0.413 0.508 0.001 0.398 
[epoch: 100] Test KL loss: 0.146 RMSE 0.228
						 Test R2 score: 0.358 0.442 -0.078 0.346 
[epoch: 100] Train KL loss: 0.154 Train R2 score: 0.413 0.508 0.001 0.398 
[epoch: 100] Test KL loss: 0.146 Test R2 score: 0.358 0.442 -0.078 0.346 
[epoch: 200] Train KL loss: 0.144 RMSE 0.218
						 Train R2 score: 0.465 0.557 0.020 0.449 
[epoch: 200] Test KL loss: 0.136 RMSE 0.218
						 Test R2 score: 0.410 0.493 -0.074 0.369 
[epoch: 200] Train KL loss: 0.144 Train R2 score: 0.465 0.557 0.020 0.449 
[epoch: 200] Test KL loss: 0.1