In [1]:
import numpy as np
import os
from pathlib import Path
import pandas as pd
import numpy as np
import orjson
import zipfile
import zstandard as zstd
import shutil
from pymatgen.core import Structure
import shutil as sh
import pathlib as pl
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, train_test_split
import time
import sys
from aim import Run

sys.path.append("../../")
from utils.save_and_load import load_from_json, save_to_json
from utils.experiment_tracking import track_metrics, log_mean_std_based_on_test_metrics

In [2]:
import numpy as np
import pandas as pd

import random
import argparse
import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch.optim as optim
from sklearn.model_selection import KFold
import os
import torch
from gnn_utils import EarlyStopping
from sklearn.model_selection import train_test_split
from torch_geometric.loader import DataLoader
from model_gnn import GNN_Graph
from gnn.data import CIFData, CIF_Lister
import sys

In [3]:
class Logger(object):
    def __init__(self, file_name="Default.log", stream=sys.stdout):
        self.terminal = stream
        self.log = open(file_name, "a")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)

    def flush(self):
        pass


if __name__ == "__main__":
    log_path = "./Logs/"
    if not os.path.exists(log_path):
        os.makedirs(log_path)

    log_file_name = (
        log_path + "log-" + time.strftime("%Y%m%d-%H%M%S", time.localtime()) + ".log"
    )
    # sys.stdout = Logger(log_file_name)

start_time = time.time()


def setup_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False


setup_seed(42)


def flatten(a):
    return [item for sublist in a for item in sublist]


class Normalizer(object):
    def __init__(self, tensor):
        self.mean = torch.mean(tensor)
        self.std = torch.std(tensor)

    def norm(self, tensor):
        return (tensor - self.mean) / self.std

    def denorm(self, normed_tensor):
        return normed_tensor * self.std + self.mean

    def state_dict(self):
        return {"mean": self.mean, "std": self.std}

    def load_state_dict(self, state_dict):
        self.mean = state_dict["mean"]
        self.std = state_dict["std"]


In [4]:
def train(
    model,
    data_loader,
    criterion,
    optimizer,
    device,
    run: Run,
    crystal_predictions_log: dict,
    epoch: int,
    fold: int,
):
    global normalizer
    model.train()

    loss_accum = 0
    ids = []
    predictions = []
    targets = []
    for step, batch_data in enumerate(data_loader):
        # print('b')        
        optimizer.zero_grad()
        batch_data = batch_data.to(device)
        pred = model(batch_data)
        true = batch_data.y.view(pred.shape)
        true_normed = normalizer.norm(true)
        loss = criterion(pred, true_normed)
        loss.backward()
        optimizer.step()
        loss_accum += loss.item()

        ids.extend(batch_data.id)
        targets.append(true.view(pred.shape).detach().cpu())
        predictions.append(normalizer.denorm(pred.detach().cpu()))
    targets = torch.cat(targets, dim=0).numpy()
    predictions = torch.cat(predictions, dim=0).numpy()

    subset = "train"
    track_metrics(
        run,
        subset=subset,
        fold=fold,
        epoch=epoch,
        loss=loss_accum / (step + 1),
        keys=ids,
        predict=predictions,
        target=targets,
        to_track=subset != "test",
    )
    for i, p in enumerate(predictions):
        crystal_predictions_log.setdefault(ids[i], [])
        crystal_predictions_log[ids[i]].append(p[0])

    return loss_accum / (step + 1)


def eval(
    model,
    data_loader,
    criterion,
    device,
    run: Run,
    crystal_predictions_log: dict,
    epoch: int,
    fold: int,
):
    global normalizer
    model.eval()
    loss_accum = 0

    ids = []
    predictions = []
    targets = []
    with torch.no_grad():
        for step, batch_data in enumerate(data_loader):
            batch_data = batch_data.to(device)
            pred = model(batch_data)
            true = batch_data.y.view(pred.shape)
            true_normed = normalizer.norm(true)
            loss = criterion(pred, true_normed)
            loss_accum += loss.item()

            ids.extend(batch_data.id)
            targets.append(true.view(pred.shape).detach().cpu())
            predictions.append(normalizer.denorm(pred.detach().cpu()))
        targets = torch.cat(targets, dim=0).numpy()
        predictions = torch.cat(predictions, dim=0).numpy()

        subset = "val"
        track_metrics(
            run,
            subset=subset,
            fold=fold,
            epoch=epoch,
            loss=loss_accum / (step + 1),
            keys=ids,
            predict=predictions,
            target=targets,
            to_track=subset != "test",
        )
        for i, p in enumerate(predictions):
            crystal_predictions_log.setdefault(ids[i], [])
            crystal_predictions_log[ids[i]].append(p[0])

        return loss_accum / (step + 1)


def test(
    model,
    data_loader,
    device,
    run: Run,
    crystal_predictions_log: dict,
    epoch: int,
    fold: int,
):
    global normalizer
    model.eval()
    y_pred = []
    y_true = []

    ids = []
    predictions = []
    targets = []
    with torch.no_grad():
        for batch_id, batch_data in enumerate(data_loader):
            batch_data = batch_data.to(device)
            pred = model(batch_data)
            true = batch_data.y.view(pred.shape)
            y_true.append(true.view(pred.shape).detach().cpu())
            y_pred.append(normalizer.denorm(pred.detach().cpu()))

            ids.extend(batch_data.id)
            targets.append(true.view(pred.shape).detach().cpu())
            predictions.append(normalizer.denorm(pred.detach().cpu()))
        targets = torch.cat(targets, dim=0).numpy()
        predictions = torch.cat(predictions, dim=0).numpy()

        subset = "test"
        track_metrics(
            run,
            subset=subset,
            fold=fold,
            epoch=epoch,
            loss=0,
            keys=ids,
            predict=predictions,
            target=targets,
            to_track=subset != "test",
        )
        for i, p in enumerate(predictions):
            crystal_predictions_log.setdefault(ids[i], [])
            crystal_predictions_log[ids[i]].append(p[0])

    return y_true, y_pred


In [5]:
normalizer = None


def train_for_folder(
    run: Run,
    root_dir="./data/regression/",
    output_dir="./trained/",
    model=None,
    use_gap: bool = False,
    dataset_name: str | None = None,
):
    global normalizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    parser = argparse.ArgumentParser(description="CrystalGNN")
    parser.add_argument(
        "--property",
        default="nlo",
        choices=[
            "matbench_dielectric",
            "matbench_log_gvrh",
            "matbench_log_kvrh",
            "matbench_mp_e_form",
            "matbench_mp_gap",
            "matbench_jdft2d",
        ],
        help="crystal property to train ",
    )
    parser.add_argument(
        "--batch_size",
        type=int,
        default=64,
        help="input batch size for training (default: 64)",
    )

    parser.add_argument(
        "--epochs", type=int, default=500, help="number of epochs to train"
    )
    parser.add_argument(
        "--patience", type=float, default=100, help="patiece (default:50)"
    )
    parser.add_argument(
        "--pretrain_model_file",
        type=str,
        default="./pre-trained/mp_gap.pth",
        help="filename to read the model (if there is any)",
    )
    sys.argv = [""]
    args = parser.parse_args()

    loss_func = torch.nn.L1Loss()

    if use_gap:
        data_path = "./data/nlo"
        CRYSTAL_DATA = CIFData(data_path)
        id_prop_file = os.path.join(data_path, "id_prop.csv")
        dataset = pd.read_csv(
            id_prop_file,
            names=[
                "cif_id",
                "label",
                "gap_",
                "gap0",
                "gap1",
                "gap2",
                "gap3",
                "gap4",
                "gap5",
                "gap6",
                "gap7",
                "gap8",
                "gap9",
            ],
        )

    else:
        data_path = "../../data/final_data/"
        dataset_path = data_path + dataset_name + ".json"
        preprocessed_data_path = f"./data/{dataset_name}/"
        # creating .csv and .cifs
        id_prop_file = os.path.join(preprocessed_data_path, "id_prop.csv")
        data = load_from_json(dataset_path)
        k_v_dict = {}
        os.makedirs(preprocessed_data_path, exist_ok=True)
        for k, v in data.items():
            Structure.from_dict(v["structure"]).to_file(
                preprocessed_data_path + k + ".cif", "cif"
            )
            k_v_dict[k] = v["shg"]
        k_v_table = dict(
            id=list(k_v_dict.keys()), label=list(k_v_dict[k] for k in k_v_dict.keys())
        ) | {
            s: 0
            for s in [
                "gap_",
                "gap0",
                "gap1",
                "gap2",
                "gap3",
                "gap4",
                "gap5",
                "gap6",
                "gap7",
                "gap8",
                "gap9",
            ]
        }
        pd.DataFrame(k_v_table, index=list(range(len(k_v_dict)))).to_csv(
            preprocessed_data_path + "id_prop.csv", index=False, header
            =False
        )
        shutil.copy(
            "data/nlo/cgcnn-embedding.json",
            preprocessed_data_path + "cgcnn-embedding.json",
        )
        # using .csv and .cifs
        root_dir = preprocessed_data_path

        CRYSTAL_DATA = CIFData(root_dir=root_dir)

        dataset = pd.read_csv(
            id_prop_file,
            names=[
                "cif_id",
                "label",
                "gap_",
                "gap0",
                "gap1",
                "gap2",
                "gap3",
                "gap4",
                "gap5",
                "gap6",
                "gap7",
                "gap8",
                "gap9",
            ],
        )

    # enable deterministic learning
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    random.seed(42)
    np.random.seed(42)
    %env CUBLAS_WORKSPACE_CONFIG=:4096:8

    k_folds = 10
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    data_range = np.arange(0, len(dataset))

    run["hparams"] = vars(args)
    jsons_to_log = None
    fold_partition = {}
    fold_partition_names = {}
    run.set_artifacts_uri(f"file://{os.getcwd()}/artifacts/")
    os.makedirs("artifacts", exist_ok=True)
    val_res = []
    test_res = []
    for fold, (train_val_idx, test_idx) in enumerate(kfold.split(data_range)):
        crystal_predictions_log = {}
        print(f"FOLD {fold}")
        print("--------------------------------")

        train_idx, val_idx = train_test_split(
            train_val_idx, train_size=8 / 9, random_state=42
        )

        # log fold partition
        fold_partition = dict(
            fold=fold_partition.get("fold", {})
            | {
                fold: dict(
                    train_idx=train_idx.tolist(),
                    val_idx=val_idx.tolist(),
                    test_idx=test_idx.tolist(),
                )
            }
        )
        fold_partition_names = dict(
            fold=fold_partition_names.get("fold", {})
            | {
                fold: dict(
                    train_names=[dataset.iloc[x]["cif_id"] for x in train_idx.tolist()],
                    val_names=[dataset.iloc[x]["cif_id"] for x in val_idx.tolist()],
                    test_names=[dataset.iloc[x]["cif_id"] for x in test_idx.tolist()],
                )
            }
        )

        # log dataset once for all folds
        dataset_key_target = {x: y for x, y in zip(dataset["cif_id"], dataset["label"])}

        if jsons_to_log is None:
            jsons_to_log = dict(
                dataset_key_target=dataset_key_target,
                fold_partition=fold_partition,
                fold_partition_names=fold_partition_names,
                crystal_predictions_log_for_all_folds={},
            )
        else:
            jsons_to_log["fold_partition"] = fold_partition
            jsons_to_log["fold_partition_names"] = fold_partition_names

        target = dataset["label"].tolist()
        target_train = [target[i] for i in train_idx]
        target_train = torch.tensor(target_train)
        normalizer = Normalizer(target_train)
        train_dataset = CIF_Lister(train_idx, CRYSTAL_DATA, df=dataset)

        val_dataset = CIF_Lister(val_idx, CRYSTAL_DATA, df=dataset)
        test_dataset = CIF_Lister(test_idx, CRYSTAL_DATA, df=dataset)
        g = torch.manual_seed(42)

        train_loader = DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=True, generator=g
        )
        val_loader = DataLoader(val_dataset, batch_size=args.batch_size)
        test_loader = DataLoader(test_dataset, batch_size=args.batch_size)

        model_params = dict(
            num_layer=4, num_classes=1, emb_dim=64, drop_ratio=0.2, use_gap=use_gap
        )
        model = GNN_Graph(**model_params)
        if not args.pretrain_model_file == "":
            # model.load_state_dict(torch.load(args.model_file))
            pretrained_model = torch.load(args.pretrain_model_file)
            model_dict = model.state_dict()
            pretrained_dict = pretrained_model["model_state_dict"]
            pretrained_dict = {
                k: v for k, v in pretrained_dict.items() if k in model_dict
            }
            model_dict.update(pretrained_dict)
            model.load_state_dict(model_dict)
        model_file = "./saved_model/%s_bst_%s.pth" % (args.property, fold)

        run["model_parameters_count"] = sum(p.numel() for p in model.parameters())
        run["model_trainable_parameters_count"] = sum(
            p.numel() for p in model.parameters() if p.requires_grad
        )
        run["hparams"] = vars(args) | dict(model_params=model_params)

        loss_func = torch.nn.L1Loss()
        stopper = EarlyStopping(
            mode="lower", patience=args.patience, filename=model_file
        )

        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=10**-3)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
        t = time.time()

        for epoch in range(1, args.epochs + 1):
            train_loss = train(
                model,
                train_loader,
                loss_func,
                optimizer,
                device,
                run,
                crystal_predictions_log,
                epoch,
                fold,
            )
            val_loss = eval(
                model,
                val_loader,
                loss_func,
                device,
                run,
                crystal_predictions_log,
                epoch,
                fold,
            )
            val_res.append(val_loss)
            if epoch % 20 == 0:
                print(epoch)
                print(train_loss)
                print(val_loss)
            scheduler.step()
            early_stop = stopper.step(val_loss, model)
            if early_stop:
                break
        run.track(time.time() - t, name="train_time", context={"fold": fold})

        stopper.load_checkpoint(model)
        val_mae = eval(
            model,
            val_loader,
            loss_func,
            device,
            run,
            crystal_predictions_log,
            epoch,
            fold,
        )
        val_res.append(val_mae)
        print("best_val_mae", val_mae)
        print("---------Evaluate Model on Test Set---------------")
        t = time.time()
        y_true, y_pred = test(
            model, test_loader, device, run, crystal_predictions_log, 0, fold
        )
        run.track(time.time() - t, name="test_time", context={"fold": fold})
        y_true = torch.cat(y_true, dim=0).numpy()
        y_pred = torch.cat(y_pred, dim=0).numpy()

        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mae = mean_absolute_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)
        test_scores = {"rmse": rmse, "mae": mae, "r2": r2}
        print("test score", test_scores)
        test_res.append(test_scores)
        test_true = flatten(y_true)
        test_pred = flatten(y_pred)
        df_test = pd.DataFrame(test_true, columns=["true"])
        df_test = pd.concat(
            [df_test, pd.DataFrame(test_pred, columns=["pred"])], axis=1
        )
        save_path = "predictions"
        os.makedirs(save_path, exist_ok=True)
        save_name = f"output_cv{fold}.csv"
        df_test.to_csv(f"{save_path}/{save_name}", index_label="Index")
        fold = fold + 1

        # run,
        # jsons_to_log=json_to_log,
        # train_val_test_loaders=[
        #     train_loader,
        #     val_loader,
        #     test_loader,
        #     prepare_batch,
        # ],
        # fold=fold,

        crystal_predictions_log_for_all_folds = dict(
            fold=jsons_to_log["crystal_predictions_log_for_all_folds"].get("fold", {})
            | {fold: crystal_predictions_log}
        )
        jsons_to_log["crystal_predictions_log_for_all_folds"] = (
            crystal_predictions_log_for_all_folds
        )
        json_name = "train_info.json"
        save_to_json(jsons_to_log, json_name)
        run.log_artifact(json_name)
    cols = ["rmse", "mae", "r2"]

    te = [list(item.values()) for item in test_res]
    te_pd = pd.DataFrame(te, columns=cols)
    te_pd.to_csv(
        "./stat_res/{}_statistical_results.csv".format(args.property), index=False
    )

    print("val mean:", np.mean(val_res), "val std:", np.std(val_res))
    print("testing mean:", np.mean(te, axis=0), "test std:", np.std(te, axis=0))
    end_time = time.time()
    print("the total elapsed time is", end_time - start_time, "S")

    log_mean_std_based_on_test_metrics(run)
    run.close()


def train_for_ds(ds_name: str):
    # Initialize a new run
    run = Run(
        experiment=f"GINE FT from mp_gap.pth on {ds_name}",
        log_system_params=True,
    )

    abs_path = os.path.abspath(f"trained/{ds_name}/")

    train_for_folder(run, output_dir=abs_path, dataset_name=ds_name)
    run.close()


In [6]:
# # Initialize a new run
# run = Run(
#     experiment="GINE FT from mp_gap.pth on nlo",
#     log_system_params=True,
# )

# # Log run parameters
# # run["hparams"] = vars(args)
# abs_path = os.path.abspath("trained/nlo/")

# train_for_folder(run, output_dir="abs_path")
# run.close()


In [7]:
train_for_ds("base_dataset_of_eff_shg")

  with zopen(filename, mode=mode) as file:


env: CUBLAS_WORKSPACE_CONFIG=:4096:8
FOLD 0
--------------------------------


  pretrained_model = torch.load(args.pretrain_model_file)
  with zopen(filename, mode="rt", errors="replace") as file:
  with zopen(filename, mode="rt", errors="replace") as file:


20
0.17757041580401933
0.20419304072856903
40
0.15987700338547045
0.20481422543525696
60
0.1220419596020992
0.18157900869846344
80
0.10697418766526076
0.20342113822698593
100
0.10012176002447422
0.20694156736135483
120
0.08176674521886386
0.2058733105659485
140
0.09027270800792254
0.20967137068510056
160
0.08234829541582328
0.20323322713375092


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  with zopen(filename, mode="rt", errors="replace") as file:


best_val_mae 0.17474929988384247
---------Evaluate Model on Test Set---------------


  pretrained_model = torch.load(args.pretrain_model_file)


test score {'rmse': 9.48091014436522, 'mae': 3.42946457862854, 'r2': 0.5984973907470703}
FOLD 1
--------------------------------
20
0.1584707498550415
0.3864123821258545
40
0.14518261299683496
0.3518219143152237
60
0.11859089136123657
0.39483919739723206
80
0.10658067694077125
0.3851796090602875
100
0.09730574374015515
0.4031425341963768
120
0.09890738473488735
0.3788216561079025


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.3360329121351242
---------Evaluate Model on Test Set---------------
test score {'rmse': 12.448938678407488, 'mae': 4.112592697143555, 'r2': 0.5571961402893066}
FOLD 2
--------------------------------
20
0.21323265135288239
0.17287050187587738
40
0.1462420975932708
0.23771964013576508
60
0.13580341293261602
0.22131862491369247
80
0.1105073203261082
0.22073635458946228
100
0.09769431071785781
0.20318248122930527


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.16534889489412308
---------Evaluate Model on Test Set---------------
test score {'rmse': 13.967493192726717, 'mae': 4.506410121917725, 'r2': 0.5223679542541504}
FOLD 3
--------------------------------
20
0.18846111171520674
0.19512592256069183
40
0.1449823843745085
0.1834743693470955
60
0.12119448672120388
0.19167093932628632
80
0.1383777389732691
0.19317779690027237
100
0.10132982925726818
0.1902484968304634
120
0.08986719497121297
0.19450999796390533
140
0.0888863644347741
0.1920366808772087
160
0.08671480474563745
0.19740375131368637


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.16366494446992874
---------Evaluate Model on Test Set---------------
test score {'rmse': 16.85564935085749, 'mae': 5.418757438659668, 'r2': -0.12208783626556396}
FOLD 4
--------------------------------
20
0.1488733125420717
0.2365468442440033
40
0.14926729064721328
0.22580035775899887
60
0.1224101331944649
0.22008922696113586
80
0.09494029358029366
0.22891028225421906
100
0.09595462221365708
0.25566472113132477
120
0.09679756915340057
0.23169714212417603


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.19085510075092316
---------Evaluate Model on Test Set---------------
test score {'rmse': 15.579111816004978, 'mae': 4.354256629943848, 'r2': 0.5169278383255005}
FOLD 5
--------------------------------
20
0.15160077151197654
0.21919316053390503
40
0.15109225190602815
0.22572169452905655
60
0.12612922203082305
0.23285410925745964
80
0.09453806166465466
0.22659259289503098
100
0.08880154186716446
0.22126521915197372
120
0.07770496807419337
0.21639898419380188
140
0.08218110152162038
0.22710203751921654
160
0.073314889978904
0.22895336896181107


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.19282546266913414
---------Evaluate Model on Test Set---------------
test score {'rmse': 11.702810774736848, 'mae': 3.9329822063446045, 'r2': 0.5829688310623169}
FOLD 6
--------------------------------
20
0.2073014286848215
0.2500482201576233
40
0.15162582351611212
0.19206630438566208
60
0.13086913869931147
0.1914190575480461
80
0.1014907589325538
0.16391026228666306
100
0.09122880032429329
0.16360993683338165
120
0.08914336533500598
0.16292289644479752
140
0.09755235308637986
0.15247376263141632
160
0.09453421114729
0.1574239358305931


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.14007066935300827
---------Evaluate Model on Test Set---------------
test score {'rmse': 11.242642858003503, 'mae': 4.667992115020752, 'r2': 0.6577613949775696}
FOLD 7
--------------------------------
20
0.17767487466335297
0.25430530309677124
40
0.13601877712286436
0.22485124319791794
60
0.11151168495416641
0.20166345685720444
80
0.09408519932856926
0.20746389031410217
100
0.12624507454725412
0.2366207093000412
120
0.08437176048755646
0.229329913854599
140
0.09420509235217021
0.2416338473558426


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.19597142934799194
---------Evaluate Model on Test Set---------------
test score {'rmse': 16.528344055789816, 'mae': 6.045881271362305, 'r2': 0.5808724761009216}
FOLD 8
--------------------------------
20
0.17487166076898575
0.18937494605779648
40
0.14220787756718123
0.16444505751132965
60
0.10727128787682606
0.179189994931221
80
0.09229507497870006
0.1794845312833786
100
0.11855654561748871
0.1708677038550377
120
0.08511802525474475
0.17325282096862793
140
0.07789389531199749
0.17253504693508148
160
0.08734435874682206
0.1733991503715515
180
0.07411256604469739
0.17429275810718536
200
0.07397038031082886
0.16803214699029922


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])
  pretrained_model = torch.load(args.pretrain_model_file)


best_val_mae 0.15903615951538086
---------Evaluate Model on Test Set---------------
test score {'rmse': 6.5820309602188365, 'mae': 2.784467935562134, 'r2': 0.6410475969314575}
FOLD 9
--------------------------------
20
0.1666630503649895
0.23986809700727463
40
0.14603643405895966
0.23320100456476212
60
0.11967540016541114
0.251056544482708
80
0.11258599047477429
0.2687074691057205
100
0.09207113087177277
0.248882208019495


  model.load_state_dict(torch.load(self.filename)['model_state_dict'])


best_val_mae 0.23186000436544418
---------Evaluate Model on Test Set---------------
test score {'rmse': 21.509279599436116, 'mae': 6.593064785003662, 'r2': 0.19342541694641113}
val mean: 0.22550945763061658 val std: 0.05776695214257748
testing mean: [13.58972114  4.58458698  0.47289772] test std: [4.03090841 1.10242588 0.2335815 ]
the total elapsed time is 291.07199716567993 S
