This notebook documents part 2 of the complementarity of image and demographic information: the ability of latent space extracted from Autoencoders to predict mode choice and trip generation.

In [1]:
import sys
sys.path.append("models/")

%load_ext autoreload
%autoreload 2
from collections import OrderedDict
import os
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import pickle as pkl
import numpy as np

import itertools
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn import linear_model
from sklearn.metrics import r2_score, mean_squared_error
import statsmodels.api as sm
from exp_version import get_hp_from_version_code


from dataloader import SurveyDataset, load_aggregate_travel_behavior, load_demo
from M1_util_train_test import load_model, test
import linear_reg
import mnl
from setup import out_dir, data_dir, image_dir, model_dir, proj_dir

plt.rcParams.update({"font.size":12})

In [2]:
data_version = '1571'

model_type = 'SAE'
sampling = 's'

zoomlevel = 'zoom15'
output_dim = 3
model_run_date = '2208'

v1 = 'D'
v2 = 1

weight, lr, wd = get_hp_from_version_code(v1, v2)


variable_names = ['active','auto','mas','pt', 'trpgen']

demo_variables = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
         'pctwhite_alone','pct_nonwhite','pctblack_alone',
         'pct_col_grad','avg_tt_to_work','inc_per_capita']


# Load Model Embeddings

In [3]:
with open(proj_dir+"latent_space/"+model_type+"_"+zoomlevel+"_"+str(output_dim**2*2048)+"_"+
                       v1+"_"+str(v2)+"_"+model_run_date+".pkl", "rb") as f: 
    encoder_output = pkl.load(f)
    im = pkl.load(f)
    ct = pkl.load(f)

In [4]:
# Aggregate Embeddings
unique_ct = list(set(ct))
unique_ct.sort()
ct = np.array(ct)
aggregate_embeddings = []
for i in unique_ct:
    aggregate_embeddings.append(np.mean(encoder_output[ct == i], axis=0))
aggregate_embeddings = np.array(aggregate_embeddings)

In [5]:
aggregate_embeddings.shape

(1571, 18432)

# Load Trip Behavior

In [6]:
file = "origin_trip_behavior.csv"
df_pivot = load_aggregate_travel_behavior(file, data_version)

train_test_index = df_pivot['train_test'].astype(bool).to_numpy()
# train_test_index = np.random.rand(len(df_pivot)) < 0.2

y = df_pivot[variable_names].to_numpy()
y_train = y[~train_test_index,:4]
y_test = y[train_test_index,:4]

In [7]:
x_train = aggregate_embeddings[~train_test_index, :]
x_test = aggregate_embeddings[train_test_index, :]

In [8]:
auto_train = y[~train_test_index,1]
auto_test = y[train_test_index,1]

pt_train = y[~train_test_index,3]
pt_test = y[train_test_index,3]

active_train = y[~train_test_index,0]
active_test = y[train_test_index,0]

trpgen_train = y[~train_test_index,-1]
trpgen_test = y[train_test_index,-1]


# 1. Linear Regression

### 1.1 Auto Share

In [30]:
# Linear Regression without Regularization
lr = linear_model.LinearRegression()
lr.fit(x_train, auto_train)
# with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#     f.write("%s,%s,%s,%.4f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], -1, 
#         lr.score(x_train, auto_train), lr.score(x_test, auto_test), 'lr', zoomlevel,
#         np.sum(lr.coef_ != 0), len(lr.coef_)))
print("Train R2: %.4f \t Test R2: %.4f" % (lr.score(x_train, auto_train), lr.score(x_test, auto_test)))

Train R2: 1.0000 	 Test R2: 0.5557


In [31]:
# Lasso
for a in (1e-4)*np.array([0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,6,7,8,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, auto_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, auto_train), 
                                                                                  lasso.score(x_test, auto_test), 
                                                                                  np.sum(lasso.coef_ != 0)))

    with open(out_dir+"SAE_A_LR.csv", "a") as f:
        f.write("%.2E,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % (weight,a,'auto',
            lasso.score(x_train, auto_train), lasso.score(x_test, auto_test), 'lasso', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  lasso.fit(x_train, auto_train)
  model = cd_fast.enet_coordinate_descent(


Parameter: 0.00e+00 Train R2: 1.0000 	 Test R: 0.5290 	 Nonzero coef: 18432


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-05 Train R2: 0.9992 	 Test R: 0.4670 	 Nonzero coef: 3684


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-05 Train R2: 0.9971 	 Test R: 0.4543 	 Nonzero coef: 2642


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-05 Train R2: 0.9899 	 Test R: 0.4857 	 Nonzero coef: 1880


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-05 Train R2: 0.9797 	 Test R: 0.4969 	 Nonzero coef: 1464


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-05 Train R2: 0.9670 	 Test R: 0.5350 	 Nonzero coef: 1254


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-04 Train R2: 0.9528 	 Test R: 0.5843 	 Nonzero coef: 1109


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-04 Train R2: 0.8765 	 Test R: 0.6576 	 Nonzero coef: 650


  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.8090 	 Test R: 0.6910 	 Nonzero coef: 399


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.7610 	 Test R: 0.7149 	 Nonzero coef: 280


  model = cd_fast.enet_coordinate_descent(


Parameter: 5.00e-04 Train R2: 0.7269 	 Test R: 0.7250 	 Nonzero coef: 196


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.7049 	 Test R: 0.7223 	 Nonzero coef: 164


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.6893 	 Test R: 0.7149 	 Nonzero coef: 133


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-04 Train R2: 0.6783 	 Test R: 0.7129 	 Nonzero coef: 107


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-03 Train R2: 0.6606 	 Test R: 0.7083 	 Nonzero coef: 76
Parameter: 2.00e-03 Train R2: 0.6321 	 Test R: 0.6895 	 Nonzero coef: 29
Parameter: 5.00e-03 Train R2: 0.6056 	 Test R: 0.6710 	 Nonzero coef: 16


In [None]:
# Ridge

for a in (5e+0)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, auto_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, auto_train), 
                                                              ridge.score(x_test, auto_test)))

### 1.2 PT

In [32]:
# Lasso
for a in (1e-4)*np.array([0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,6,7,8,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, pt_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, pt_train), 
                                                                                  lasso.score(x_test, pt_test), 
                                                                                  np.sum(lasso.coef_ != 0)))

    with open(out_dir+"SAE_A_LR.csv", "a") as f:
        f.write("%.2E,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % (weight,a,'pt',
            lasso.score(x_train, pt_train), lasso.score(x_test, pt_test), 'lasso', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  lasso.fit(x_train, pt_train)
  model = cd_fast.enet_coordinate_descent(


Parameter: 0.00e+00 Train R2: 1.0000 	 Test R: 0.1199 	 Nonzero coef: 18432


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-05 Train R2: 0.9964 	 Test R: 0.2339 	 Nonzero coef: 2533


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-05 Train R2: 0.9875 	 Test R: 0.2894 	 Nonzero coef: 1817


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-05 Train R2: 0.9607 	 Test R: 0.3285 	 Nonzero coef: 1212


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-05 Train R2: 0.9268 	 Test R: 0.3745 	 Nonzero coef: 942


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-05 Train R2: 0.8915 	 Test R: 0.3915 	 Nonzero coef: 749


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-04 Train R2: 0.8557 	 Test R: 0.3909 	 Nonzero coef: 607


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-04 Train R2: 0.7164 	 Test R: 0.4166 	 Nonzero coef: 261


  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.6389 	 Test R: 0.4724 	 Nonzero coef: 143


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.6015 	 Test R: 0.4922 	 Nonzero coef: 91


  model = cd_fast.enet_coordinate_descent(


Parameter: 5.00e-04 Train R2: 0.5786 	 Test R: 0.5000 	 Nonzero coef: 64


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.5606 	 Test R: 0.5017 	 Nonzero coef: 54


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.5471 	 Test R: 0.5013 	 Nonzero coef: 50


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-04 Train R2: 0.5361 	 Test R: 0.5007 	 Nonzero coef: 42
Parameter: 1.00e-03 Train R2: 0.5210 	 Test R: 0.4939 	 Nonzero coef: 33
Parameter: 2.00e-03 Train R2: 0.4984 	 Test R: 0.4933 	 Nonzero coef: 13
Parameter: 5.00e-03 Train R2: 0.4749 	 Test R: 0.4920 	 Nonzero coef: 11


In [None]:
# Ridge

for a in (1e+1)*np.array([0,0.5,0.6,0.7,0.8,0.9,1,2,3,4,5,10,20,50]):
# for a in (5e0) * np.array([0, 4.5,4.6,4.7,4.8,4.9, 5, 5.1,5.2,5.3,5.4,5.5]):
    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, pt_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, pt_train), 
                                                              ridge.score(x_test, pt_test)))

### 1.3 Active

In [33]:
for a in (1e-4)*np.array([0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,6,7,8,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, active_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, active_train), 
                                                                                  lasso.score(x_test, active_test), 
                                                                                  np.sum(lasso.coef_ != 0)))

    with open(out_dir+"SAE_A_LR.csv", "a") as f:
        f.write("%.2E,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % (weight,a,'active',
            lasso.score(x_train, active_train), lasso.score(x_test, active_test), 'lasso', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  lasso.fit(x_train, active_train)
  model = cd_fast.enet_coordinate_descent(


Parameter: 0.00e+00 Train R2: 1.0000 	 Test R: 0.2905 	 Nonzero coef: 18432


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-05 Train R2: 0.9984 	 Test R: 0.3287 	 Nonzero coef: 3336


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-05 Train R2: 0.9944 	 Test R: 0.2892 	 Nonzero coef: 2323


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-05 Train R2: 0.9808 	 Test R: 0.3251 	 Nonzero coef: 1653


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-05 Train R2: 0.9620 	 Test R: 0.3575 	 Nonzero coef: 1309


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-05 Train R2: 0.9404 	 Test R: 0.3838 	 Nonzero coef: 1078


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-04 Train R2: 0.9172 	 Test R: 0.4022 	 Nonzero coef: 910


  model = cd_fast.enet_coordinate_descent(


Parameter: 2.00e-04 Train R2: 0.7988 	 Test R: 0.4680 	 Nonzero coef: 474


  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.7149 	 Test R: 0.5102 	 Nonzero coef: 287


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.6619 	 Test R: 0.5489 	 Nonzero coef: 181


  model = cd_fast.enet_coordinate_descent(


Parameter: 5.00e-04 Train R2: 0.6275 	 Test R: 0.5565 	 Nonzero coef: 141


  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.6017 	 Test R: 0.5475 	 Nonzero coef: 109


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.5849 	 Test R: 0.5431 	 Nonzero coef: 90


  model = cd_fast.enet_coordinate_descent(


Parameter: 8.00e-04 Train R2: 0.5729 	 Test R: 0.5362 	 Nonzero coef: 73


  model = cd_fast.enet_coordinate_descent(


Parameter: 1.00e-03 Train R2: 0.5557 	 Test R: 0.5399 	 Nonzero coef: 55
Parameter: 2.00e-03 Train R2: 0.5157 	 Test R: 0.5346 	 Nonzero coef: 34
Parameter: 5.00e-03 Train R2: 0.4840 	 Test R: 0.5344 	 Nonzero coef: 16


In [None]:
# Ridge

# for a in (1e+1)*np.array([0,0.1,0.5,0.6,0.7,0.8,0.9,1,2,3,4,5,10,20,50]):
for a in (1e+1)*np.array([0,1, 1.5, 2, 1.5, 3]):
    
    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, active_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, active_train), 
                                                              ridge.score(x_test, active_test)))

### 1.4 Trip Generation

In [None]:
for a in (1e-2)*np.array([0,0.1,6,7,8,10,11,12,13,14,15,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, trpgen_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d" % (a, lasso.score(x_train, trpgen_train), 
                                                                                  lasso.score(x_test, trpgen_test), 
                                                                                  np.sum(lasso.coef_ != 0)))
#     with open(out_dir+"BA_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%.6f,%.4f,%.4f,%s,%d,%d\n" % (a, 
#             lasso.score(x_train, trpgen_train), lasso.score(x_test, trpgen_test), 'lasso', 
#             np.sum(lasso.coef_ != 0), len(lasso.coef_)))

In [None]:
# Ridge

for a in (1e+1)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, trpgen_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, trpgen_train), 
                                                              ridge.score(x_test, trpgen_test)))

# 2. MNL for Mode Share

In [9]:
# dataloader and model definition

trainset = SurveyDataset(torch.tensor(x_train,  dtype=torch.float), torch.tensor(y_train, dtype=torch.float))
trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=False)

testset = SurveyDataset(torch.tensor(x_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.float))
testloader = DataLoader(testset, batch_size=len(testset), shuffle=False)

kldivloss = nn.KLDivLoss(reduction='sum')
mseloss = nn.MSELoss(reduction='none')

In [10]:
sst_train = np.sum(np.power(y_train - np.mean(y_train, axis=0), 2), axis=0)
sst_test = np.sum(np.power(y_test - np.mean(y_test, axis=0), 2), axis=0)

In [11]:
def mnl_torch(lr_list, wd_list):
    
    for (lr, wd) in itertools.product(lr_list, wd_list):
        
        print(f"[lr: {lr:.4f}, wd: {wd:3.2e}]")

        # model setup
        model = mnl.MNL(n_alts=4, n_features=x_train.shape[-1])
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

#         print(optimizer)
        # model training

        converged = 0
        ref1 = 0
        ref2 = 0

        for epoch in range(5000):

            kl_ = 0
            mse_ = 0
            mse1_ = 0
            mse2_ = 0
            mse3_ = 0
            mse4_ = 0

            for batch, (x_batch, y_batch) in enumerate(trainloader):
                
                # Compute prediction and loss
                util = model(x_batch)
                probs = torch.log(nn.functional.softmax(util, dim=1))
                kl = kldivloss(probs, y_batch)
        #         kl = kldivloss(torch.log(util), y_batch)
                kl_ += kl.item()

                mse = mseloss(torch.exp(probs), y_batch)
        #         mse = mseloss(util, y_batch)
                mse_ += mse.sum().item()
                mse1_ += mse[:,0].sum().item()
                mse2_ += mse[:,1].sum().item()
                mse3_ += mse[:,2].sum().item()
                mse4_ += mse[:,3].sum().item()
                mse = mse.sum()

                # Backpropagation
                optimizer.zero_grad()
                kl.backward()
                optimizer.step()

            train_kl = kl_/len(trainset)
            train_mse = np.sqrt(mse_/len(trainset))
            train_mse1 = np.sqrt(mse1_/len(trainset))
            train_mse2 = np.sqrt(mse2_/len(trainset))
            train_mse3 = np.sqrt(mse3_/len(trainset))
            train_mse4 = np.sqrt(mse4_/len(trainset))

            train_r1 = 1-mse1_/sst_train[0]
            train_r2 = 1-mse2_/sst_train[1]
            train_r3 = 1-mse3_/sst_train[2]
            train_r4 = 1-mse4_/sst_train[3]

            loss_ = train_kl

            if epoch % 5 == 0:

                kl_ = 0
                mse_ = 0 
                mse1_ = 0
                mse2_ = 0
                mse3_ = 0
                mse4_ = 0

                for batch, (x_batch, y_batch) in enumerate(testloader):
                    
                    util = model(x_batch)
                    probs = torch.log(nn.functional.softmax(util,dim=1))
                    kl = kldivloss(probs, y_batch)
            #         kl = kldivloss(torch.log(util), y_batch)
                    kl_ += kl.item()

                    mse = mseloss(torch.exp(probs), y_batch)
            #         mse = mseloss(util, y_batch)
                    mse_ += mse.sum().item()
                    mse1_ += mse[:,0].sum().item()
                    mse2_ += mse[:,1].sum().item()
                    mse3_ += mse[:,2].sum().item()
                    mse4_ += mse[:,3].sum().item()

                test_kl = kl_/len(testset)
                test_mse = np.sqrt(mse_/len(testset))
                test_mse1 = np.sqrt(mse1_/len(testset))
                test_mse2 = np.sqrt(mse2_/len(testset))
                test_mse3 = np.sqrt(mse3_/len(testset))
                test_mse4 = np.sqrt(mse4_/len(testset))

                r1 = r2_score(y_batch.numpy()[:,0],torch.exp(probs).detach().numpy()[:,0])
                r2 = r2_score(y_batch.numpy()[:,1],torch.exp(probs).detach().numpy()[:,1])
                r3 = r2_score(y_batch.numpy()[:,2],torch.exp(probs).detach().numpy()[:,2])
                r4 = r2_score(y_batch.numpy()[:,3],torch.exp(probs).detach().numpy()[:,3])

                if epoch >= 40:
                    if (np.abs(loss_ - ref1)/ref1<0.001) & (np.abs(loss_ - ref2)/ref2<0.001):
                        converged = 1
                        print("Early stopping at epoch", epoch)
                        break
                    if (ref1 < loss_) & (ref1 < ref2):
                        print("Diverging. stop.")
                        break
                    if loss_ < best:
                        best = loss_
                        best_epoch = epoch
                        output = (best_epoch, train_kl, train_mse, train_mse1, train_mse2, train_mse3, train_mse4,
                                  test_kl, test_mse, test_mse1, test_mse2, test_mse3, test_mse4,
                                  train_r1, train_r2, train_r3, train_r4, r1, r2, r3, r4)
                else:
                    best = loss_
                    best_epoch = epoch
                    output = (best_epoch, train_kl, train_mse, train_mse1, train_mse2, train_mse3, train_mse4,
                                  test_kl, test_mse, test_mse1, test_mse2, test_mse3, test_mse4,
                                  train_r1, train_r2, train_r3, train_r4, r1, r2, r3, r4)
                ref2 = ref1
                ref1 = loss_

            if epoch % 300 == 0:

                print(f"[epoch: {epoch:>3d}] Train KL loss: {train_kl:.3f} RMSE {train_mse:.3f}")
                   # {train_mse1:.3f} {train_mse2:.3f} {train_mse3:.3f} {train_mse4:.3f}")
                print(f"\t\t\t\t\t\t Train R2 score: {train_r1:.3f} {train_r2:.3f} {train_r3:.3f} {train_r4:.3f} ")
                print(f"[epoch: {epoch:>3d}] Test KL loss: {kl_/len(testset):.3f} RMSE {np.sqrt(mse_/len(testset)):.3f}")
                   #     {np.sqrt(mse1_/len(testset)):.3f} {np.sqrt(mse2_/len(testset)):.3f} {np.sqrt(mse3_/len(testset)):.3f} {np.sqrt(mse4_/len(testset)):.3f}")
                print(f"\t\t\t\t\t\t Test R2 score: {r1:.3f} {r2:.3f} {r3:.3f} {r4:.3f} ")

                print(f"[epoch: {epoch:>3d}] Train KL loss: {train_kl:.3f} Train R2 score: {train_r1:.3f} {train_r2:.3f} {train_r3:.3f} {train_r4:.3f} ")
                print(f"[epoch: {epoch:>3d}] Test KL loss: {kl_/len(testset):.3f} Test R2 score: {r1:.3f} {r2:.3f} {r3:.3f} {r4:.3f} ")

        with open(out_dir+"SAE_A_MNL.csv", "a") as f:
            f.write("%.1E,%.1E,%.1E,%d,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%d\n" % 
                    ((weight,lr,wd)+output+(converged,)))

        print(f"[epoch: {best_epoch:>3d}] Train KL loss: {output[1]:.3f} Train R2 score: {output[13]:.3f} {output[14]:.3f} {output[15]:.3f} {output[16]:.3f} ")
        print(f"[epoch: {best_epoch:>3d}] Test KL loss: {output[7]:.3f} Test R2 score: {output[17]:.3f} {output[18]:.3f} {output[19]:.3f} {output[20]:.3f} ")
        print()
        
    return model

In [15]:
for i in range(1):
#     mnl_torch(lr_list=[1e-4], wd_list=[1e-3]);
#     mnl_torch(lr_list=[1e-4], wd_list=[1e-2]);

    model = mnl_torch(lr_list=[1e-4], wd_list=[1e-1]);
#     model = mnl_torch(lr_list=[1e-5], wd_list=[1e+0]);
#     mnl_torch(lr_list=[1e-4], wd_list=[0.1,1,10,50,100,1000]);
#     model = mnl_torch(lr_list=[1e-5], wd_list=[1e+1]);
#     mnl_torch(lr_list=[1e-4], wd_list=[50]);

#     mnl_torch(lr_list=[1e-4], wd_list=[1e+2]);

#     mnl_torch(lr_list=[5e-5], wd_list=[1e+3]);

[lr: 0.0001, wd: 1.00e-01]
[epoch:   0] Train KL loss: 0.782 RMSE 0.657
						 Train R2 score: -1.691 -4.592 -6.467 -1.708 
[epoch:   0] Test KL loss: 0.279 RMSE 0.346
						 Test R2 score: -0.140 -0.483 -2.953 -0.106 
[epoch:   0] Train KL loss: 0.782 Train R2 score: -1.691 -4.592 -6.467 -1.708 
[epoch:   0] Test KL loss: 0.279 Test R2 score: -0.140 -0.483 -2.953 -0.106 
[epoch: 300] Train KL loss: 0.150 RMSE 0.225
						 Train R2 score: 0.420 0.528 0.004 0.437 
[epoch: 300] Test KL loss: 0.125 RMSE 0.203
						 Test R2 score: 0.479 0.578 -0.073 0.412 
[epoch: 300] Train KL loss: 0.150 Train R2 score: 0.420 0.528 0.004 0.437 
[epoch: 300] Test KL loss: 0.125 Test R2 score: 0.479 0.578 -0.073 0.412 
[epoch: 600] Train KL loss: 0.141 RMSE 0.214
						 Train R2 score: 0.466 0.582 0.010 0.487 
[epoch: 600] Test KL loss: 0.116 RMSE 0.192
						 Test R2 score: 0.532 0.633 -0.103 0.448 
[epoch: 600] Train KL loss: 0.141 Train R2 score: 0.466 0.582 0.010 0.487 
[epoch: 600] Test KL loss: 0.11

In [16]:
torch.save(model.state_dict(), out_dir+"sae_a_D_1_220829.pt")
