In [1]:
%load_ext autoreload
import tensorflow as tf
import json
import numpy as np
import seaborn as sns
from src import preparedata
from src import transformermodel
from src import traintransformer
import matplotlib.pyplot as plt

import sklearn.metrics
import matplotlib.pyplot as plt
import seaborn as sns


In [6]:
params = json.load(open("params/params.json", "r"))
dataset = preparedata.readTransformerData(params["dataprepinargs"])
dataset.preparedata()

Mean per column (shape): (31,)
[3.37377567e-01 3.56235749e-06 9.91227657e-01 2.24726433e+00
 6.88662964e-01 2.83613458e-02 3.84363547e-01 4.77288135e+00
 2.28409226e+00 2.24211400e-02 2.58218508e-03 4.93846793e-03
 6.32293834e-02 6.24560288e-03 2.89504466e+01 5.53486080e+01
 6.13873430e+00 1.77681776e-02 3.66009223e-03 2.18470647e-03
 7.85163740e-02 2.70172285e-01 4.76005854e+00 2.81624643e+01
 3.46659737e+00 5.49580121e+00 1.00167371e+01 1.35190149e+01
 2.41176557e-01 5.84255289e+01 2.96733512e+01]
Overall mean: 5.817267754268974
Relevant columns (shape): (31,)
[False False False False False False False False False False False False
 False False  True  True  True False False False False False False  True
 False False  True  True False  True  True]


In [7]:
print(dataset.Xt_train.shape, dataset.Xc_train.shape)

(62408, 31, 1) (62408, 32)


In [None]:
landslidehazard = transformermodel.lsmodel(params["modelparam"])
landslidehazard.preparemodel()

landslidehazard.model.summary()

In [None]:
traintransformer.trainmodel(
    landslidehazard.model,
    [dataset.Xt_train, np.nan_to_num(dataset.Xc_train, 0)],
    dataset.Y_train,
    params["trainparam"],
)

In [None]:
# Predict on training set
train_preds = landslidehazard.model.predict([dataset.Xt_train, np.nan_to_num(dataset.Xc_train, 0)])
train_fpr, train_tpr, train_thresholds = sklearn.metrics.roc_curve(dataset.Y_train, train_preds)
train_auc = sklearn.metrics.auc(train_fpr, train_tpr)
print(f"Train AUC = {train_auc}")

train_preds2 = np.where(train_preds > 0.50, 1, 0)

# Calculate metrics for training set
train_rec = sklearn.metrics.recall_score(dataset.Y_train, train_preds2)
train_acc = sklearn.metrics.accuracy_score(dataset.Y_train, train_preds2)
train_f1 = sklearn.metrics.f1_score(dataset.Y_train, train_preds2)
train_f2 = sklearn.metrics.fbeta_score(dataset.Y_train, train_preds2, beta=2)
train_mcc = sklearn.metrics.matthews_corrcoef(dataset.Y_train, train_preds2)
train_k = sklearn.metrics.cohen_kappa_score(dataset.Y_train, train_preds2)

print(f"Train Recall = {train_rec}")
print(f"Train ACC = {train_acc}")
print(f"Train F1 = {train_f1}")
print(f"Train F2 = {train_f2}")
print(f"Train MCC = {train_mcc}")
print(f"Train K = {train_k}")

# Print confusion matrices for both training and testing sets
print("Training Confusion Matrix:")
print(sklearn.metrics.confusion_matrix(dataset.Y_train, train_preds2))

# Predict on testing set
test_preds = landslidehazard.model.predict([dataset.Xt_test, np.nan_to_num(dataset.Xc_test, 0)])
test_fpr, test_tpr, test_thresholds = sklearn.metrics.roc_curve(dataset.Y_test, test_preds)
test_auc = sklearn.metrics.auc(test_fpr, test_tpr)
print(f"Test AUC = {test_auc}")

test_preds2 = np.where(test_preds > 0.50, 1, 0)

# Calculate metrics for testing set
test_rec = sklearn.metrics.recall_score(dataset.Y_test, test_preds2)
test_acc = sklearn.metrics.accuracy_score(dataset.Y_test, test_preds2)
test_f1 = sklearn.metrics.f1_score(dataset.Y_test, test_preds2)
test_f2 = sklearn.metrics.fbeta_score(dataset.Y_test, test_preds2, beta=2)
test_mcc = sklearn.metrics.matthews_corrcoef(dataset.Y_test, test_preds2)
test_k = sklearn.metrics.cohen_kappa_score(dataset.Y_test, test_preds2)

print(f"Test Recall = {test_rec}")
print(f"Test ACC = {test_acc}")
print(f"Test F1 = {test_f1}")
print(f"Test F2 = {test_f2}")
print(f"Test MCC = {test_mcc}")
print(f"Test K = {test_k}")

print("Testing Confusion Matrix:")
print(sklearn.metrics.confusion_matrix(dataset.Y_test, test_preds2))

# Plot ROC curve for training set
plt.figure()
lw = 2
plt.plot(
    train_fpr,
    train_tpr,
    color="blue",
    lw=lw,
    label="Train ROC curve (area = %0.3f)" % train_auc,
)
plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Training Set")
plt.legend(loc="lower right")
plt.show()

# Plot ROC curve for testing set
plt.figure()
lw = 2
plt.plot(
    test_fpr,
    test_tpr,
    color="darkorange",
    lw=lw,
    label="Test ROC curve (area = %0.3f)" % test_auc,
)
plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Testing Set")
plt.legend(loc="lower right")
plt.savefig("roc_test.pdf")
plt.show()

In [None]:
# # Save the current model
model_save_path = "checkpoints/DS_daily_75.keras"
landslidehazard.model.save(model_save_path)
print(f"Model saved to {model_save_path}")