In [1]:
import os
import pandas as pd
import numpy as np
import pickle
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# ==========================================================
# üìå PATHS
# ==========================================================
BASE = r"C:\Users\NXTWAVE\Downloads\Industrial Chemical Leak Prediction"

FILE1 = os.path.join(BASE, r"archive\station_hour.csv")
FILE2 = os.path.join(BASE, r"archive\stations.csv")
FILE3 = os.path.join(BASE, r"archive\city_hour.csv")
FILE4 = os.path.join(BASE, r"archive\station_day.csv")
FILE5 = os.path.join(BASE, r"archive\station_day.csv")  # duplicate as requested

# ==========================================================
# üìå LOAD ALL DATA
# ==========================================================
df1 = pd.read_csv(FILE1)
df2 = pd.read_csv(FILE2)
df3 = pd.read_csv(FILE3)
df4 = pd.read_csv(FILE4)
df5 = pd.read_csv(FILE5)

print("Loaded datasets successfully!")

# ==========================================================
# üìå MERGE INTO ONE DATAFRAME
# ==========================================================
# Reset index and concatenate safely
df = pd.concat([df1, df3, df4, df5], ignore_index=True)

# Merge station metadata if "station" column exists
if "station" in df.columns and "station" in df2.columns:
    df = df.merge(df2, on="station", how="left")

print("Merged dataset shape:", df.shape)

# ==========================================================
# üìå CLEANING
# ==========================================================
# Remove duplicates
df = df.drop_duplicates()

# Fill missing numeric values
df = df.fillna(method="ffill").fillna(method="bfill")

# Keep numeric columns only
df_numeric = df.select_dtypes(include=[np.number])

if df_numeric.shape[1] < 2:
    raise ValueError("Not enough numeric columns to train the model!")

# ==========================================================
# üìå FEATURE‚ÄìTARGET SPLIT
# ==========================================================
# Last numeric column = target (Industry Waste Index)
X = df_numeric.iloc[:, :-1].values
y = df_numeric.iloc[:, -1].values

# Scale X and y
scaler_X = MinMaxScaler()
X = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler()
y = scaler_y.fit_transform(y.reshape(-1, 1))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ==========================================================
# üìå BUILD MODEL
# ==========================================================
model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(64, activation="relu"),
    Dropout(0.2),
    Dense(32, activation="relu"),
    Dense(1, activation="linear")
])

model.compile(optimizer="adam", loss="mse", metrics=["mae"])

early_stop = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# ==========================================================
# üìå TRAIN MODEL
# ==========================================================
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=40,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# ==========================================================
# üìå SAVE OUTPUT FILES
# ==========================================================

SAVE_PATH = BASE

# 1Ô∏è‚É£ H5 file
model.save(os.path.join(SAVE_PATH, "industry_waste_model.h5"))

# 2Ô∏è‚É£ PKL file (scalers + model weights)
with open(os.path.join(SAVE_PATH, "industry_waste_model.pkl"), "wb") as f:
    pickle.dump({"scaler_X": scaler_X, "scaler_y": scaler_y,
                 "weights": model.get_weights()}, f)

# 3Ô∏è‚É£ JSON architecture
with open(os.path.join(SAVE_PATH, "industry_waste_model.json"), "w") as f:
    f.write(model.to_json())

# 4Ô∏è‚É£ YAML architecture
try:
    yaml_string = model.to_yaml()
    with open(os.path.join(SAVE_PATH, "industry_waste_model.yaml"), "w") as f:
        f.write(yaml_string)
except:
    print("‚ö† YAML export deprecated in new TensorFlow, JSON saved instead.")

print("\n============================================")
print("üéâ ALL FILES SAVED SUCCESSFULLY!")
print("üìÅ Location:", SAVE_PATH)
print("============================================")





  df1 = pd.read_csv(FILE1)


Loaded datasets successfully!
Merged dataset shape: (3513028, 18)


  df = df.fillna(method="ffill").fillna(method="bfill")




Epoch 1/40


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
‚ö† YAML export deprecated in new TensorFlow, JSON saved instead.

üéâ ALL FILES SAVED SUCCESSFULLY!
üìÅ Location: C:\Users\NXTWAVE\Downloads\Industrial Chemical Leak Prediction


  saving_api.save_model(
