In [2]:
# ============================================================
# üåû SolarSense | Smart Solar Panel Efficiency Optimizer (Fixed)
# ============================================================

import os
import pandas as pd
import numpy as np
import json
import yaml
import pickle
import random
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, Flatten
from tensorflow.keras.callbacks import EarlyStopping

# ------------------------------------------------------------
# 1Ô∏è‚É£ CONFIGURATION
# ------------------------------------------------------------

DATA_DIR = r"C:\Users\NXTWAVE\Downloads\Smart Solar Panel Efficiency Optimizer\archive"
OUTPUT_DIR = r"C:\Users\NXTWAVE\Downloads\Smart Solar Panel Efficiency Optimizer\results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# File paths
files = {
    "plant1_gen": os.path.join(DATA_DIR, "Plant_1_Generation_Data.csv"),
    "plant1_weather": os.path.join(DATA_DIR, "Plant_1_Weather_Sensor_Data.csv"),
    "plant2_gen": os.path.join(DATA_DIR, "Plant_2_Generation_Data.csv"),
    "plant2_weather": os.path.join(DATA_DIR, "Plant_2_Weather_Sensor_Data.csv"),
}

# ------------------------------------------------------------
# 2Ô∏è‚É£ LOAD DATASETS
# ------------------------------------------------------------

def load_and_merge(gen_path, weather_path):
    gen = pd.read_csv(gen_path, parse_dates=['DATE_TIME'], dayfirst=True)
    weather = pd.read_csv(weather_path, parse_dates=['DATE_TIME'], dayfirst=True)

    # Merge on DATE_TIME
    df = pd.merge(gen, weather, on="DATE_TIME", how="inner")

    # Identify plant ID if missing
    if 'PLANT_ID_x' in df.columns:
        df['PLANT_ID'] = df['PLANT_ID_x']
    elif 'PLANT_ID' not in df.columns:
        df['PLANT_ID'] = os.path.basename(gen_path).split('_')[1]

    return df

df1 = load_and_merge(files["plant1_gen"], files["plant1_weather"])
df2 = load_and_merge(files["plant2_gen"], files["plant2_weather"])
df = pd.concat([df1, df2], ignore_index=True)

print("[INFO] Dataset shape:", df.shape)
print("[INFO] Columns available:", list(df.columns)[:15])

# ------------------------------------------------------------
# 3Ô∏è‚É£ FEATURE ENGINEERING
# ------------------------------------------------------------

# Ensure key columns exist
for col in ['DC_POWER', 'AC_POWER', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION']:
    if col not in df.columns:
        raise KeyError(f"Missing expected column: {col}")

# Compute efficiency safely
df["EFFICIENCY"] = np.where(df["DC_POWER"] > 0, df["AC_POWER"] / df["DC_POWER"], 0)

# Extract time-based features
df["HOUR"] = df["DATE_TIME"].dt.hour
df["DAY"] = df["DATE_TIME"].dt.day
df["MONTH"] = df["DATE_TIME"].dt.month

# Select feature and target columns
feature_cols = ["AMBIENT_TEMPERATURE", "MODULE_TEMPERATURE", "IRRADIATION", "HOUR", "DAY", "MONTH"]
target_col = "EFFICIENCY"

df = df.dropna(subset=feature_cols + [target_col])
X = df[feature_cols].values
y = df[target_col].values.reshape(-1, 1)

# ------------------------------------------------------------
# 4Ô∏è‚É£ SCALE FEATURES
# ------------------------------------------------------------

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# ------------------------------------------------------------
# 5Ô∏è‚É£ TRAIN / TEST SPLIT
# ------------------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=False)

# Reshape for LSTM (samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# ------------------------------------------------------------
# 6Ô∏è‚É£ PSO-Like RANDOM SEARCH (light version)
# ------------------------------------------------------------

def random_search_optimize(n_iter=5):
    best_rmse = np.inf
    best_params = None

    for i in range(n_iter):
        lr = random.uniform(0.0005, 0.01)
        hidden = random.randint(32, 128)
        dropout = random.uniform(0.1, 0.4)

        model = Sequential([
            Conv1D(32, kernel_size=1, activation='relu', input_shape=(1, X_train.shape[2])),
            Flatten(),
            Dense(hidden, activation='relu'),
            Dropout(dropout),
            Dense(1)
        ])

        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='mse')
        model.fit(X_train, y_train, epochs=3, batch_size=64, verbose=0)
        y_pred = model.predict(X_test, verbose=0)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))

        print(f"[OPT] Iter {i+1:02d} | RMSE={rmse:.4f} | hidden={hidden}, lr={lr:.5f}, drop={dropout:.2f}")
        if rmse < best_rmse:
            best_rmse = rmse
            best_params = {"hidden": hidden, "lr": lr, "dropout": dropout}
    
    return best_params

best_params = random_search_optimize()
print("[INFO] Best Parameters:", best_params)

# ------------------------------------------------------------
# 7Ô∏è‚É£ BUILD FINAL HYBRID MODEL (CNN + LSTM)
# ------------------------------------------------------------

model = Sequential([
    Conv1D(64, kernel_size=1, activation='relu', input_shape=(1, X_train.shape[2])),
    LSTM(best_params["hidden"], return_sequences=False),
    Dropout(best_params["dropout"]),
    Dense(1)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params["lr"]), loss='mse')

es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_split=0.2, epochs=40, batch_size=64, callbacks=[es], verbose=1)

# ------------------------------------------------------------
# 8Ô∏è‚É£ EVALUATION
# ------------------------------------------------------------

y_pred = model.predict(X_test)
y_pred_inv = scaler_y.inverse_transform(y_pred)
y_test_inv = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
r2 = r2_score(y_test_inv, y_pred_inv)

print(f"[RESULT] RMSE: {rmse:.4f}, R2: {r2:.4f}")

# ------------------------------------------------------------
# 9Ô∏è‚É£ SAVE ALL OUTPUTS (.h5, .pkl, .yaml, .json)
# ------------------------------------------------------------

# Model
model.save(os.path.join(OUTPUT_DIR, "SolarSense_model.h5"))

# Scalers
with open(os.path.join(OUTPUT_DIR, "scaler_x.pkl"), "wb") as f:
    pickle.dump(scaler_x, f)
with open(os.path.join(OUTPUT_DIR, "scaler_y.pkl"), "wb") as f:
    pickle.dump(scaler_y, f)

# Config (YAML)
config = {
    "features": feature_cols,
    "target": target_col,
    "best_params": best_params,
    "metrics": {"RMSE": float(rmse), "R2": float(r2)}
}
with open(os.path.join(OUTPUT_DIR, "SolarSense_config.yaml"), "w") as f:
    yaml.dump(config, f)

# Results (JSON)
summary_report = {
    "Dataset Shape": df.shape,
    "RMSE": rmse,
    "R2": r2,
    "Best Hyperparameters": best_params,
    "Output Files": os.listdir(OUTPUT_DIR)
}
with open(os.path.join(OUTPUT_DIR, "SolarSense_results.json"), "w") as f:
    json.dump(summary_report, f, indent=4)

print("[INFO] ‚úÖ All models & configs saved in:", OUTPUT_DIR)
print("[INFO] Files generated:", os.listdir(OUTPUT_DIR))


[INFO] Dataset shape: (136472, 13)
[INFO] Columns available: ['DATE_TIME', 'PLANT_ID_x', 'SOURCE_KEY_x', 'DC_POWER', 'AC_POWER', 'DAILY_YIELD', 'TOTAL_YIELD', 'PLANT_ID_y', 'SOURCE_KEY_y', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION', 'PLANT_ID']



  weather = pd.read_csv(weather_path, parse_dates=['DATE_TIME'], dayfirst=True)
  gen = pd.read_csv(gen_path, parse_dates=['DATE_TIME'], dayfirst=True)
  weather = pd.read_csv(weather_path, parse_dates=['DATE_TIME'], dayfirst=True)



[OPT] Iter 01 | RMSE=0.3944 | hidden=69, lr=0.00794, drop=0.33
[OPT] Iter 02 | RMSE=0.4221 | hidden=88, lr=0.00417, drop=0.28
[OPT] Iter 03 | RMSE=0.3800 | hidden=65, lr=0.00729, drop=0.25
[OPT] Iter 04 | RMSE=0.4850 | hidden=33, lr=0.00279, drop=0.21
[OPT] Iter 05 | RMSE=0.4391 | hidden=103, lr=0.00812, drop=0.36
[INFO] Best Parameters: {'hidden': 65, 'lr': 0.007291037550166211, 'dropout': 0.24946410997597523}
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
[RESULT] RMSE: 0.6028, R2: -0.5242
[INFO] ‚úÖ All models & configs saved in: C:\Users\NXTWAVE\Downloads\Smart Solar Panel Efficiency Optimizer\results
[INFO] Files generated: ['scaler_x.pkl', 'scaler_y.pkl', 'SolarSense_config.yaml', 'SolarSense_model.h5', 'SolarSense_results.json']


  saving_api.save_model(
