In [1]:
import os
import json
import yaml
import pickle
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# ============================================================
# PATHS
# ============================================================

BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Aquifer Recharge Potential Mapping"

DISTRICT_RAINFALL_PATH = os.path.join(
    BASE_DIR, "archive", "district wise rainfall normal.csv"
)

HISTORICAL_RAINFALL_PATH = os.path.join(
    BASE_DIR, "archive", "rainfall in india 1901-2015.csv"
)

os.makedirs(BASE_DIR, exist_ok=True)

# ============================================================
# LOAD DATA
# ============================================================

district_df = pd.read_csv(DISTRICT_RAINFALL_PATH)
historical_df = pd.read_csv(HISTORICAL_RAINFALL_PATH)

district_df.columns = district_df.columns.str.lower().str.strip()
historical_df.columns = historical_df.columns.str.lower().str.strip()

# ============================================================
# CLEAN & PREPARE
# ============================================================

# Identify rainfall columns
district_rain_cols = district_df.select_dtypes(include=np.number).columns
historical_rain_cols = historical_df.select_dtypes(include=np.number).columns

# Long-term statistics
historical_df["long_term_mean"] = historical_df[historical_rain_cols].mean(axis=1)
historical_df["long_term_std"] = historical_df[historical_rain_cols].std(axis=1)

historical_summary = historical_df[["long_term_mean", "long_term_std"]].mean().to_dict()

# District rainfall feature
district_df["district_mean_rainfall"] = district_df[district_rain_cols].mean(axis=1)

# Recharge proxy (normalized rainfall)
district_df["recharge_index"] = (
    district_df["district_mean_rainfall"] /
    district_df["district_mean_rainfall"].max()
)

# ============================================================
# FEATURES & TARGET
# ============================================================

X = district_df[["district_mean_rainfall"]]
y = district_df["recharge_index"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# ============================================================
# RANDOM FOREST MODEL
# ============================================================

rf = RandomForestRegressor(
    n_estimators=300,
    max_depth=10,
    random_state=42
)

rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# ============================================================
# SIMPLE NN MODEL (H5 OUTPUT PURPOSE)
# ============================================================

nn = Sequential([
    Dense(16, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(8, activation="relu"),
    Dense(1, activation="linear")
])

nn.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="mse"
)

nn.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=30,
    verbose=0
)

# ============================================================
# SAVE FILES
# ============================================================

# 1Ô∏è‚É£ Random Forest Model
with open(os.path.join(BASE_DIR, "recharge_model.pkl"), "wb") as f:
    pickle.dump(rf, f)

# 2Ô∏è‚É£ Scaler
with open(os.path.join(BASE_DIR, "recharge_scaler.pkl"), "wb") as f:
    pickle.dump(scaler, f)

# 3Ô∏è‚É£ Neural Network Weights
nn.save(os.path.join(BASE_DIR, "recharge_nn_weights.h5"))

# 4Ô∏è‚É£ Metrics JSON
metrics = {
    "mse": float(mse),
    "r2_score": float(r2),
    "historical_rainfall_summary": historical_summary
}

with open(os.path.join(BASE_DIR, "recharge_metrics.json"), "w") as f:
    json.dump(metrics, f, indent=4)

# 5Ô∏è‚É£ YAML Config
config = {
    "project": "AI-Based Aquifer Recharge Potential Mapping",
    "features": ["district_mean_rainfall"],
    "target": "recharge_index",
    "model": "RandomForestRegressor",
    "n_estimators": 300,
    "max_depth": 10,
    "scaler": "StandardScaler"
}

with open(os.path.join(BASE_DIR, "recharge_config.yaml"), "w") as f:
    yaml.dump(config, f)

# ============================================================
# DONE
# ============================================================

print("‚úÖ All files generated successfully!")
print("üìÅ Location:", BASE_DIR)
print("üì¶ Files: .pkl | .h5 | .yaml | .json")





‚úÖ All files generated successfully!
üìÅ Location: C:\Users\NXTWAVE\Downloads\Aquifer Recharge Potential Mapping
üì¶ Files: .pkl | .h5 | .yaml | .json


  saving_api.save_model(
