In [1]:
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# ============================================================
# PATHS
# ============================================================

BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Urban Water Tank Temperature Control"
DATA_PATH = os.path.join(
    BASE_DIR,
    "results_all_improvements_city_to_tourism.xlsx"
)

os.makedirs(BASE_DIR, exist_ok=True)

# ============================================================
# LOAD DATA
# ============================================================

df = pd.read_excel(DATA_PATH)
df.columns = df.columns.str.lower()

# Keep only numeric columns
df = df.select_dtypes(include=[np.number]).dropna()

# Target = last column (temperature or improvement metric)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# ============================================================
# TRAIN-TEST SPLIT
# ============================================================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ============================================================
# SCALING
# ============================================================

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler
joblib.dump(
    scaler,
    os.path.join(BASE_DIR, "scaler.pkl")
)

# ============================================================
# ANN MODEL
# ============================================================

model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation="relu"),
    Dense(1)
])

model.compile(
    optimizer="adam",
    loss="mse"
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)

# ============================================================
# EVALUATION
# ============================================================

train_pred = model.predict(X_train_scaled).flatten()
test_pred = model.predict(X_test_scaled).flatten()

metrics = {
    "train_rmse": float(np.sqrt(mean_squared_error(y_train, train_pred))),
    "test_rmse": float(np.sqrt(mean_squared_error(y_test, test_pred))),
    "train_r2": float(r2_score(y_train, train_pred)),
    "test_r2": float(r2_score(y_test, test_pred)),
    "epochs_trained": len(history.history["loss"]),
    "features_used": list(X.columns)
}

# ============================================================
# SAVE MODEL (.h5)
# ============================================================

model.save(os.path.join(BASE_DIR, "ann_model.h5"))

# ============================================================
# SAVE METRICS (.json)
# ============================================================

with open(
    os.path.join(BASE_DIR, "training_metrics.json"),
    "w"
) as f:
    json.dump(metrics, f, indent=4)

# ============================================================
# SAVE CONFIG (.yaml)
# ============================================================

config = {
    "model_type": "ANN",
    "optimizer": "adam",
    "loss": "mse",
    "layers": [
        {"units": 64, "activation": "relu"},
        {"units": 32, "activation": "relu"},
        {"units": 1, "activation": "linear"}
    ],
    "batch_size": 16,
    "max_epochs": 100,
    "early_stopping": True,
    "scaling": "StandardScaler",
    "train_test_split": 0.8
}

with open(
    os.path.join(BASE_DIR, "model_config.yaml"),
    "w"
) as f:
    yaml.dump(config, f)

# ============================================================
# SAVE PROCESSED DATASET (.pkl)
# ============================================================

joblib.dump(
    {
        "X_train": X_train_scaled,
        "X_test": X_test_scaled,
        "y_train": y_train.values,
        "y_test": y_test.values
    },
    os.path.join(BASE_DIR, "processed_dataset.pkl")
)

print("\n‚úÖ ALL FILES GENERATED SUCCESSFULLY")
print("üìÅ Location:", BASE_DIR)





Epoch 1/100

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoc

  saving_api.save_model(
