In [1]:
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# ============================================================
# PATHS
# ============================================================

BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Urban Heat Island (UHI) Prediction"
DATA_PATH = os.path.join(BASE_DIR, "archive", "city_temperature.csv")

os.makedirs(BASE_DIR, exist_ok=True)

# ============================================================
# LOAD DATA
# ============================================================

df = pd.read_csv(DATA_PATH)

# Keep only numeric + relevant columns
df.columns = df.columns.str.lower()

target_col = "avgtemperature"

df = df.dropna(subset=[target_col])
df = df.select_dtypes(include=[np.number])

X = df.drop(columns=[target_col])
y = df[target_col]

# ============================================================
# TRAIN TEST SPLIT
# ============================================================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ============================================================
# SCALING
# ============================================================

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

joblib.dump(
    scaler,
    os.path.join(BASE_DIR, "uhi_scaler.pkl")
)

# ============================================================
# BASELINE ML MODEL (Random Forest)
# ============================================================

rf = RandomForestRegressor(
    n_estimators=200,
    max_depth=10,
    random_state=42
)

rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

rf_rmse = np.sqrt(mean_squared_error(y_test, rf_preds))
rf_r2 = r2_score(y_test, rf_preds)

joblib.dump(
    rf,
    os.path.join(BASE_DIR, "uhi_baseline_model.pkl")
)

# ============================================================
# DEEP LEARNING MODEL (Saved as .h5)
# ============================================================

model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(64, activation="relu"),
    Dropout(0.2),
    Dense(1)
])

model.compile(
    optimizer="adam",
    loss="mse"
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

model.save(
    os.path.join(BASE_DIR, "uhi_model.h5")
)

# ============================================================
# EVALUATION
# ============================================================

dl_preds = model.predict(X_test_scaled).flatten()

dl_rmse = np.sqrt(mean_squared_error(y_test, dl_preds))
dl_r2 = r2_score(y_test, dl_preds)

# ============================================================
# SAVE RESULTS (JSON)
# ============================================================

results = {
    "baseline_random_forest": {
        "rmse": float(rf_rmse),
        "r2_score": float(rf_r2)
    },
    "deep_learning_model": {
        "rmse": float(dl_rmse),
        "r2_score": float(dl_r2)
    },
    "sample_predictions": {
        "actual": y_test.iloc[:10].tolist(),
        "predicted": dl_preds[:10].tolist()
    }
}

with open(
    os.path.join(BASE_DIR, "uhi_results.json"),
    "w"
) as f:
    json.dump(results, f, indent=4)

# ============================================================
# SAVE CONFIG (YAML)
# ============================================================

config = {
    "project": "Urban Heat Island Prediction",
    "data_path": DATA_PATH,
    "target_variable": target_col,
    "models": [
        "RandomForestRegressor",
        "Deep Neural Network"
    ],
    "train_test_split": 0.8,
    "scaling": "StandardScaler"
}

with open(
    os.path.join(BASE_DIR, "uhi_config.yaml"),
    "w"
) as f:
    yaml.dump(config, f)

# ============================================================
# DONE
# ============================================================

print("\n‚úÖ ALL FILES GENERATED SUCCESSFULLY")
print("üìÅ Saved in:", BASE_DIR)
print("üì¶ Files: h5 | pkl | yaml | json")





  df = pd.read_csv(DATA_PATH)




Epoch 1/100

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100


  saving_api.save_model(



‚úÖ ALL FILES GENERATED SUCCESSFULLY
üìÅ Saved in: C:\Users\NXTWAVE\Downloads\Urban Heat Island (UHI) Prediction
üì¶ Files: h5 | pkl | yaml | json
