This notebook documents part 3 of the **complementarity of image and demographic information**: combine the image latent spaces and demographics to predict mode choice and trip generation.

In [3]:
import sys
sys.path.append("models/")

%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import pickle as pkl
import numpy as np
from time import time

import glob
import itertools

from sklearn import linear_model
from sklearn.metrics import r2_score, mean_squared_error
import statsmodels.api as sm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from M1_util_train_test import load_model, test
import mnl
import linear_reg
from dataloader import SurveyDataset, load_aggregate_travel_behavior, load_demo, train_test_split_data
from setup import out_dir, data_dir, image_dir, model_dir, proj_dir



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load Model Embeddings

In [4]:
data_version = '1571'

model_type = 'AE'
sampling = 's'

zoomlevel = 'zoom15'
output_dim = 3
model_run_date = '2208'
v2 = 1

variable_names = ['active','auto','mas','pt', 'trpgen']

demo_variables = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
         'pctwhite_alone','pct_nonwhite','pctblack_alone',
         'pct_col_grad','avg_tt_to_work','inc_per_capita']

with open(proj_dir+"latent_space/"+model_type+"_"+zoomlevel+"_"+str(output_dim**2*2048)+
                       "_"+str(v2)+"_"+model_run_date+".pkl", "rb") as f: 
    encoder_output = pkl.load(f)
    im = pkl.load(f)
    ct = pkl.load(f)
    
# Aggregate Embeddings
unique_ct = list(set(ct))
unique_ct.sort()
ct = np.array(ct)
embed_ae = []
for i in unique_ct:
    embed_ae.append(np.mean(encoder_output[ct == i], axis=0))
embed_ae = np.array(embed_ae)

In [5]:
model_type = 'SAE'
sampling = 's'

zoomlevel = 'zoom15'
output_dim = 3
model_run_date = '2208'

v1 = 'F'
v2 = 1

with open(proj_dir+"latent_space/"+model_type+"_"+zoomlevel+"_"+str(output_dim**2*2048)+"_"+
                       v1+"_"+str(v2)+"_"+model_run_date+".pkl", "rb") as f: 
    encoder_output = pkl.load(f)
    im = pkl.load(f)
    ct = pkl.load(f)
    
# Aggregate Embeddings
unique_ct = list(set(ct))
unique_ct.sort()
ct = np.array(ct)
embed_sae = []
for i in unique_ct:
    embed_sae.append(np.mean(encoder_output[ct == i], axis=0))
embed_sae = np.array(embed_sae)

In [6]:
model_type = 'SSD'
zoomlevel = 'zoom15'
output_dim = 3
model_run_date = '2208'

with open(proj_dir+"latent_space/SSD_"+zoomlevel+"_"+str(output_dim**2*2048)+"_"+str(v2)+"_"+
                       str(model_run_date)+".pkl", "rb") as f:
    encoder_output = pkl.load(f)
    im = pkl.load(f)
    ct = pkl.load(f)
    
# Aggregate Embeddings
unique_ct = list(set(ct))
unique_ct.sort()
ct = np.array(ct)
embed_ssd = []
for i in unique_ct:
    embed_ssd.append(np.mean(encoder_output[ct == i], axis=0))
embed_ssd = np.array(embed_ssd)

# Load Demographics

In [7]:
# demographic variables

demo_variables = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
         'pctwhite_alone','pct_nonwhite','pctblack_alone',
         'pct_col_grad','avg_tt_to_work','inc_per_capita']

demo_cs, demo_np = load_demo(data_dir)
demo = np.hstack((np.array(demo_cs).reshape(-1,1), demo_np))
demo = pd.DataFrame(demo, columns = ['geoid'] + demo_variables)
demo_split = train_test_split_data(demo, data_version='1571')

demo_train = demo_split[~demo_split['train_test'].astype(bool)][demo_variables].to_numpy(dtype=float)
demo_test = demo_split[demo_split['train_test'].astype(bool)][demo_variables].to_numpy(dtype=float)


# Load Trip Behavior

In [8]:
file = "origin_trip_behavior.csv"
df_pivot = load_aggregate_travel_behavior(file, str(len(unique_ct)))

train_test_index = df_pivot['train_test'].astype(bool).to_numpy()

y = df_pivot[variable_names].to_numpy()
y_train = y[~train_test_index,:4]
y_test = y[train_test_index,:4]

In [9]:
embed_ae_train = embed_ae[~train_test_index, :]
embed_ae_test = embed_ae[train_test_index, :]

embed_sae_train = embed_sae[~train_test_index, :]
embed_sae_test = embed_sae[train_test_index, :]

embed_ssd_train = embed_ssd[~train_test_index, :]
embed_ssd_test = embed_ssd[train_test_index, :]

In [10]:
x_train = np.concatenate([demo_train, embed_ssd_train, embed_ae_train, embed_sae_train], axis=1)
x_test = np.concatenate([demo_test, embed_ssd_test, embed_ae_test, embed_sae_test], axis=1)

In [11]:
dim_demo = demo_train.shape[1]
dim_embed_ae = embed_ae.shape[1]
dim_embed_sae = embed_sae.shape[1]
dim_embed_ssd = embed_ssd.shape[1]

In [12]:
auto_train = y[~train_test_index,1]
auto_test = y[train_test_index,1]

pt_train = y[~train_test_index,3]
pt_test = y[train_test_index,3]

active_train = y[~train_test_index,0]
active_test = y[train_test_index,0]

trpgen_train = y[~train_test_index,-1]
trpgen_test = y[train_test_index,-1]


# 1. Linear Regression

## 1.1 Auto

In [27]:
# Lasso
for a in (1e-4)*np.array([6,7,8]):#[0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, auto_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d, %d, %d, %d" % (a, lasso.score(x_train, auto_train), 
                                  lasso.score(x_test, auto_test), 
                                  np.sum(lasso.coef_[:dim_demo] != 0), 
                                  np.sum(lasso.coef_[dim_demo:dim_demo+dim_embed_ssd] != 0), 
                                  np.sum(lasso.coef_[dim_demo+dim_embed_ssd:dim_demo+dim_embed_ssd+dim_embed_ae] != 0),
                                  np.sum(lasso.coef_[dim_demo+dim_embed_ssd+dim_embed_ae:] != 0)))
    with open(out_dir+"AllModels_A_LR.csv", "a") as f:
        f.write("%s,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % ('All',a,'auto',
            lasso.score(x_train, auto_train), lasso.score(x_test, auto_test), 'LR', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  model = cd_fast.enet_coordinate_descent(


Parameter: 6.00e-04 Train R2: 0.7080 	 Test R: 0.7059 	 Nonzero coef: 1, 0, 2, 134


  model = cd_fast.enet_coordinate_descent(


Parameter: 7.00e-04 Train R2: 0.6934 	 Test R: 0.7060 	 Nonzero coef: 1, 0, 1, 118
Parameter: 8.00e-04 Train R2: 0.6821 	 Test R: 0.7059 	 Nonzero coef: 1, 0, 1, 96


  model = cd_fast.enet_coordinate_descent(


In [None]:
# Ridge

for a in (3e0)*np.array([0,0.1,1,2,3,4,5,6,7,8,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, auto_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, auto_train), 
                                                              ridge.score(x_test, auto_test)))

## 1.2 PT 

In [24]:
# Lasso
for a in (1e-4)*np.array([3,4,5]):#[0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, pt_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d, %d, %d, %d" % 
                                  (a, lasso.score(x_train, pt_train), 
                                  lasso.score(x_test, pt_test), 
                                  np.sum(lasso.coef_[:dim_demo] != 0), 
                                  np.sum(lasso.coef_[dim_demo:dim_demo+dim_embed_ssd] != 0), 
                                  np.sum(lasso.coef_[dim_demo+dim_embed_ssd:dim_demo+dim_embed_ssd+dim_embed_ae] != 0),
                                  np.sum(lasso.coef_[dim_demo+dim_embed_ssd+dim_embed_ae:] != 0)))

    with open(out_dir+"AllModels_A_LR.csv", "a") as f:
        f.write("%s,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % ('All',a,'pt',
            lasso.score(x_train, pt_train), lasso.score(x_test, pt_test), 'LR', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.6368 	 Test R: 0.5294 	 Nonzero coef: 1, 0, 2, 138


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.5991 	 Test R: 0.5385 	 Nonzero coef: 0, 0, 2, 105
Parameter: 5.00e-04 Train R2: 0.5749 	 Test R: 0.5366 	 Nonzero coef: 0, 0, 0, 79


  model = cd_fast.enet_coordinate_descent(


In [None]:
# Ridge

for a in (5e0)*np.array([0,0.1,1,2,2.2,2.5,3,3.5,4,5,6,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, pt_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, pt_train), 
                                                              ridge.score(x_test, pt_test)))

## 1.3 Active

In [26]:
# Lasso
for a in (1e-4)*np.array([3,4,5]):#[0,0.1,0.2,0.4,0.6,0.8,1,2,3,4,5,10,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, active_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d, %d, %d, %d" % 
                                  (a, lasso.score(x_train, active_train), 
                                  lasso.score(x_test, active_test), 
                                  np.sum(lasso.coef_[:dim_demo] != 0), 
                                  np.sum(lasso.coef_[dim_demo:dim_demo+dim_embed_ssd] != 0), 
                                  np.sum(lasso.coef_[dim_demo+dim_embed_ssd:dim_demo+dim_embed_ssd+dim_embed_ae] != 0),
                                  np.sum(lasso.coef_[dim_demo+dim_embed_ssd+dim_embed_ae:] != 0)))
 
    with open(out_dir+"AllModels_A_LR.csv", "a") as f:
        f.write("%s,%.6f,%s,%.4f,%.4f,%s,%d,%d\n" % ('All',a,'active',
            lasso.score(x_train, active_train), lasso.score(x_test, active_test), 'LR', 
            np.sum(lasso.coef_ != 0), len(lasso.coef_)))

  model = cd_fast.enet_coordinate_descent(


Parameter: 3.00e-04 Train R2: 0.7150 	 Test R: 0.5331 	 Nonzero coef: 3, 0, 12, 272


  model = cd_fast.enet_coordinate_descent(


Parameter: 4.00e-04 Train R2: 0.6591 	 Test R: 0.5372 	 Nonzero coef: 3, 0, 4, 196
Parameter: 5.00e-04 Train R2: 0.6200 	 Test R: 0.5452 	 Nonzero coef: 2, 0, 1, 152


  model = cd_fast.enet_coordinate_descent(


In [None]:
# Ridge

for a in (5e0)*np.array([0,0.1,1,1.5,2,2.5,3,4,10,20,50]):

    ridge = linear_model.Ridge(alpha=a)
    ridge.fit(x_train, active_train)
#     with open(out_dir+sampling+"_"+model_code+"_regression_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%s,%s,%s,%.5f,%.4f,%.4f,%s,%s,%d,%d\n" % (model_run_date, model_type, variable_names[-1], a, 
#             ridge.score(x_train, trpgen_train), ridge.score(x_test, trpgen_test), 'ridge', zoomlevel,
#             np.sum(ridge.coef_ != 0), len(ridge.coef_)))
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f" % (a, ridge.score(x_train, active_train), 
                                                              ridge.score(x_test, active_test)))

## 1.4 Trip Generation

In [None]:
for a in (1e-3)*np.array([0,0.1,6,7,8,10,11,12,13,14,15,20,50]):
    lasso = linear_model.Lasso(alpha=a)
    lasso.fit(x_train, trpgen_train)
    print("Parameter: %.2e Train R2: %.4f \t Test R: %.4f \t Nonzero coef: %d, %d" % (a, lasso.score(x_train, trpgen_train), 
                                                                                  lasso.score(x_test, trpgen_test), 
                                                                                  np.sum(lasso.coef_[:dim_demo] != 0), 
                                                                                  np.sum(lasso.coef_[dim_demo:] != 0)))

#     with open(out_dir+"BA_"+variable_names[-1]+".csv", "a") as f:
#         f.write("%.6f,%.4f,%.4f,%s,%d,%d\n" % (a, 
#             lasso.score(x_train, trpgen_train), lasso.score(x_test, trpgen_test), 'lasso', 
#             np.sum(lasso.coef_ != 0), len(lasso.coef_)))

# 2. MNL

In [18]:
# dataloader and model definition

trainset = SurveyDataset(torch.tensor(x_train,  dtype=torch.float), torch.tensor(y_train, dtype=torch.float))
trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=False)

testset = SurveyDataset(torch.tensor(x_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.float))
testloader = DataLoader(testset, batch_size=len(testset), shuffle=False)

kldivloss = nn.KLDivLoss(reduction='sum')
mseloss = nn.MSELoss(reduction='none')

In [19]:
sst_train = np.sum(np.power(y_train - np.mean(y_train, axis=0), 2), axis=0)
sst_test = np.sum(np.power(y_test - np.mean(y_test, axis=0), 2), axis=0)

In [20]:
def mnl_torch(lr1_list, wd1_list, lr2_list, wd2_list, dim_demo, dim_embed):
    
    for (lr1, wd1, lr2, wd2) in itertools.product(lr1_list, wd1_list, lr2_list, wd2_list):
        
        print(f"[lr1: {lr1:.3f}, wd1: {wd1:3.2e}, lr2: {lr2:.3f}, wd2: {wd2:3.2e}]")

        # model setup
        model = mnl.MNL2(n_alts=4, dim_embed=dim_embed, dim_demo=dim_demo)
        
        embed_params = []
        demo_params = []
        other_params = []
        for name, m in model.named_parameters():
    #             print(name)
            if 'embed' in name:
                embed_params.append(m)
            elif 'demo' in name:
                demo_params.append(m)
            else:
                other_params.append(m)

        optimizer = torch.optim.Adam([{'params':embed_params,'weight_decay':wd1,'lr':lr1},
                                      {'params':demo_params,'weight_decay':wd2, 'lr':lr2}])
#         optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

        # model training
        converged = 0
        ref1 = 0
        ref2 = 0

        for epoch in range(5000):

            kl_ = 0
            mse_ = 0
            mse1_ = 0
            mse2_ = 0
            mse3_ = 0
            mse4_ = 0

            for batch, (x_batch, y_batch) in enumerate(trainloader):
                
                # Compute prediction and loss
                util = model(x_batch)
                probs = torch.log(nn.functional.softmax(util, dim=1))
                kl = kldivloss(probs, y_batch)
        #         kl = kldivloss(torch.log(util), y_batch)
                kl_ += kl.item()

                mse = mseloss(torch.exp(probs), y_batch)
        #         mse = mseloss(util, y_batch)
                mse_ += mse.sum().item()
                mse1_ += mse[:,0].sum().item()
                mse2_ += mse[:,1].sum().item()
                mse3_ += mse[:,2].sum().item()
                mse4_ += mse[:,3].sum().item()
                mse = mse.sum()

                # Backpropagation
                optimizer.zero_grad()
                kl.backward()
                optimizer.step()

            train_kl = kl_/len(trainset)
            train_mse = np.sqrt(mse_/len(trainset))
            train_mse1 = np.sqrt(mse1_/len(trainset))
            train_mse2 = np.sqrt(mse2_/len(trainset))
            train_mse3 = np.sqrt(mse3_/len(trainset))
            train_mse4 = np.sqrt(mse4_/len(trainset))

            train_r1 = 1-mse1_/sst_train[0]
            train_r2 = 1-mse2_/sst_train[1]
            train_r3 = 1-mse3_/sst_train[2]
            train_r4 = 1-mse4_/sst_train[3]

            loss_ = train_kl

            if epoch % 5 == 0:

                kl_ = 0
                mse_ = 0 
                mse1_ = 0
                mse2_ = 0
                mse3_ = 0
                mse4_ = 0

                for batch, (x_batch, y_batch) in enumerate(testloader):
                    
                    util = model(x_batch)
                    probs = torch.log(nn.functional.softmax(util,dim=1))
                    kl = kldivloss(probs, y_batch)
            #         kl = kldivloss(torch.log(util), y_batch)
                    kl_ += kl.item()

                    mse = mseloss(torch.exp(probs), y_batch)
            #         mse = mseloss(util, y_batch)
                    mse_ += mse.sum().item()
                    mse1_ += mse[:,0].sum().item()
                    mse2_ += mse[:,1].sum().item()
                    mse3_ += mse[:,2].sum().item()
                    mse4_ += mse[:,3].sum().item()

                test_kl = kl_/len(testset)
                test_mse = np.sqrt(mse_/len(testset))
                test_mse1 = np.sqrt(mse1_/len(testset))
                test_mse2 = np.sqrt(mse2_/len(testset))
                test_mse3 = np.sqrt(mse3_/len(testset))
                test_mse4 = np.sqrt(mse4_/len(testset))
                
                r1 = r2_score(y_batch.numpy()[:,0],torch.exp(probs).detach().numpy()[:,0])
                r2 = r2_score(y_batch.numpy()[:,1],torch.exp(probs).detach().numpy()[:,1])
                r3 = r2_score(y_batch.numpy()[:,2],torch.exp(probs).detach().numpy()[:,2])
                r4 = r2_score(y_batch.numpy()[:,3],torch.exp(probs).detach().numpy()[:,3])

                if epoch >= 40:
                    if (np.abs(loss_ - ref1)/ref1<0.001) & (np.abs(loss_ - ref2)/ref2<0.001):
                        print("Early stopping at epoch", epoch)
                        converged = 1
                        break
                    if (ref1 < loss_) & (ref1 < ref2):
                        print("Diverging. stop.")
                        break
                    if loss_ < best:
                        best = loss_
                        best_epoch = epoch
                        output = (best_epoch, train_kl, train_mse, train_mse1, train_mse2, train_mse3, train_mse4,
                                  test_kl, test_mse, test_mse1, test_mse2, test_mse3, test_mse4,
                                  train_r1, train_r2, train_r3, train_r4, r1, r2, r3, r4)
                else:
                    best = loss_
                    best_epoch = epoch
                    output = (best_epoch, train_kl, train_mse, train_mse1, train_mse2, train_mse3, train_mse4,
                                  test_kl, test_mse, test_mse1, test_mse2, test_mse3, test_mse4,
                                  train_r1, train_r2, train_r3, train_r4, r1, r2, r3, r4)
                ref2 = ref1
                ref1 = loss_

            if epoch % 300 == 0:

                    print(f"[epoch: {epoch:>3d}] Train KL loss: {train_kl:.3f} RMSE {train_mse:.3f}")
                       # {train_mse1:.3f} {train_mse2:.3f} {train_mse3:.3f} {train_mse4:.3f}")
                    print(f"\t\t\t\t\t\t Train R2 score: {train_r1:.3f} {train_r2:.3f} {train_r3:.3f} {train_r4:.3f} ")
                    print(f"[epoch: {epoch:>3d}] Test KL loss: {kl_/len(testset):.3f} RMSE {np.sqrt(mse_/len(testset)):.3f}")
                       #     {np.sqrt(mse1_/len(testset)):.3f} {np.sqrt(mse2_/len(testset)):.3f} {np.sqrt(mse3_/len(testset)):.3f} {np.sqrt(mse4_/len(testset)):.3f}")
                    print(f"\t\t\t\t\t\t Test R2 score: {r1:.3f} {r2:.3f} {r3:.3f} {r4:.3f} ")

                    print(f"[epoch: {epoch:>3d}] Train KL loss: {train_kl:.3f} Train R2 score: {train_r1:.3f} {train_r2:.3f} {train_r3:.3f} {train_r4:.3f} ")
                    print(f"[epoch: {epoch:>3d}] Test KL loss: {kl_/len(testset):.3f} Test R2 score: {r1:.3f} {r2:.3f} {r3:.3f} {r4:.3f} ")

        with open(out_dir+"AllModels_A_MNL.csv", "a") as f:
            f.write("%s,%.1E,%.1E,%d,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%d\n" % 
                    (('All',lr1,wd1)+output+(converged,)))

        print(f"[epoch: {best_epoch:>3d}] Train KL loss: {output[1]:.3f} Train R2 score: {output[13]:.3f} {output[14]:.3f} {output[15]:.3f} {output[16]:.3f} ")
        print(f"[epoch: {best_epoch:>3d}] Test KL loss: {output[7]:.3f} Test R2 score: {output[17]:.3f} {output[18]:.3f} {output[19]:.3f} {output[20]:.3f} ")
        print()
        
    return model

In [22]:
for i in range(5):
    mnl_torch(lr1_list=[1e-4], wd1_list=[0.1,1,10,50,100,1000], lr2_list=[1e-2], wd2_list=[0], 
              dim_demo=dim_demo, dim_embed=dim_embed_ae+dim_embed_sae+dim_embed_ssd)

[lr1: 0.000, wd1: 1.00e-01, lr2: 0.010, wd2: 0.00e+00]
Early stopping at epoch 1550
[epoch: 1545] Train KL loss: 0.093 Train R2 score: 0.707 0.766 0.278 0.728 
[epoch: 1545] Test KL loss: 0.104 Test R2 score: 0.578 0.712 -0.151 0.465 

[lr1: 0.000, wd1: 1.00e+00, lr2: 0.010, wd2: 0.00e+00]
Diverging. stop.
[epoch: 1425] Train KL loss: 0.095 Train R2 score: 0.696 0.759 0.252 0.708 
[epoch: 1425] Test KL loss: 0.104 Test R2 score: 0.573 0.707 -0.169 0.492 

[lr1: 0.000, wd1: 1.00e+01, lr2: 0.010, wd2: 0.00e+00]
Diverging. stop.
[epoch: 1500] Train KL loss: 0.097 Train R2 score: 0.690 0.757 0.196 0.708 
[epoch: 1500] Test KL loss: 0.104 Test R2 score: 0.577 0.710 -0.165 0.480 

[lr1: 0.000, wd1: 5.00e+01, lr2: 0.010, wd2: 0.00e+00]
Early stopping at epoch 1325
[epoch: 1320] Train KL loss: 0.103 Train R2 score: 0.667 0.740 0.157 0.662 
[epoch: 1320] Test KL loss: 0.102 Test R2 score: 0.581 0.719 -0.112 0.506 

[lr1: 0.000, wd1: 1.00e+02, lr2: 0.010, wd2: 0.00e+00]
Early stopping at epoch 1