# Renewable Energy GCPBBB Grid Connected Photo Sensor Project

This notebook implements the Machine Learning pipeline for optimizing renewable energy integration. 
It covers data generation, feature engineering, model training, and evaluation.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBClassifier
from sklearn.metrics import mean_absolute_error, accuracy_score, classification_report
import joblib
import os

# Ensure output directories exist
os.makedirs('models', exist_ok=True)
os.makedirs('output', exist_ok=True)

## 1. Data Generation
We generate synthetic data to simulate Weather (Irradiance, Temperature), Grid Consumption, and Battery SoC.

In [None]:
def generate_synthetic_data(n_samples: int, start_date: str = '2023-01-01') -> pd.DataFrame:
    dates = pd.date_range(start=start_date, periods=n_samples, freq='min') 
    
    # Synthetic Solar Irradiance (approximate diurnal cycle)
    hour_of_day = dates.hour + dates.minute / 60.0
    irradiance = np.maximum(0, 1000 * np.sin(np.pi * (hour_of_day - 6) / 12)) 
    noise = np.random.normal(0, 50, n_samples)
    irradiance = np.maximum(0, irradiance + noise)
    
    # Synthetic Temperature
    temperature = 20 + 10 * np.sin(np.pi * (hour_of_day - 9) / 12) + np.random.normal(0, 2, n_samples)
    
    # Synthetic Grid Consumption
    consumption = 500 + 300 * np.sin(np.pi * (hour_of_day - 7) / 12)**2 + \
                  400 * np.sin(np.pi * (hour_of_day - 19) / 12)**2 + \
                  np.random.normal(0, 50, n_samples)
    consumption = np.maximum(200, consumption)

    # Synthetic Battery SoC
    soc = np.zeros(n_samples)
    soc[0] = 50.0
    battery_capacity = 10000 # Wh
    
    for i in range(1, n_samples):
        net_energy = (irradiance[i] * 0.2) - (consumption[i] * 0.1)
        delta_soc = net_energy / battery_capacity * 100
        new_soc = soc[i-1] + delta_soc
        soc[i] = np.clip(new_soc, 0, 100)

    data = pd.DataFrame({
        'Timestamp': dates,
        'Irradiance': irradiance,
        'Temperature': temperature,
        'Grid_Consumption': consumption,
        'Battery_SoC': soc
    })
    
    return data

# Generate 1 month of data
n_samples = 1440 * 30 
data = generate_synthetic_data(n_samples)
print(f"Generated {len(data)} samples.")
data.head()

## 2. Feature Engineering
We add time-based features and lag features to capture temporal dependencies.

In [None]:
def add_time_features(df: pd.DataFrame) -> pd.DataFrame:
    df['Hour'] = df['Timestamp'].dt.hour
    df['Minute'] = df['Timestamp'].dt.minute
    return df

def add_lag_features(df: pd.DataFrame, columns: list, lags: list) -> pd.DataFrame:
    for col in columns:
        for lag in lags:
            df[f'{col}_lag_{lag}'] = df[col].shift(lag)
    return df

def preprocess_features(df: pd.DataFrame) -> pd.DataFrame:
    df = add_time_features(df)
    df = add_lag_features(df, ['Irradiance', 'Grid_Consumption'], [1, 5, 15])
    df = df.dropna()
    return df

data_processed = preprocess_features(data.copy())
print(f"Data shape after preprocessing: {data_processed.shape}")

## 3. Model Development

### 3.1 Energy Predictor (Random Forest)
Predicts the next step's Grid Consumption.

In [None]:
# Prepare Data for Regression
data_reg = data_processed.copy()
data_reg['Target_Consumption'] = data_reg['Grid_Consumption'].shift(-1)
data_reg = data_reg.dropna()

feature_cols_reg = [c for c in data_reg.columns if c not in ['Timestamp', 'Target_Consumption']]
X_reg = data_reg[feature_cols_reg]
y_reg = data_reg['Target_Consumption']

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, shuffle=False)

scaler_reg = StandardScaler()
X_train_reg_scaled = scaler_reg.fit_transform(X_train_reg)
X_test_reg_scaled = scaler_reg.transform(X_test_reg)

# Train Model
model_reg = RandomForestRegressor(n_estimators=100, random_state=42)
model_reg.fit(X_train_reg_scaled, y_train_reg)

# Evaluate
y_pred_reg = model_reg.predict(X_test_reg_scaled)
mae = mean_absolute_error(y_test_reg, y_pred_reg)
print(f"Grid Consumption MAE: {mae:.4f}")

# Save
joblib.dump(model_reg, 'models/energy_predictor.pkl')
joblib.dump(scaler_reg, 'models/scaler_reg.pkl')

### 3.2 Balancing Classifier (XGBoost)
Predicts the optimal battery action: 0 (Discharge), 1 (Hold), 2 (Charge).

In [None]:
# Define Balancing Logic (Labels)
def get_balancing_signal(row):
    generation = row['Irradiance'] * 0.2
    consumption = row['Grid_Consumption'] * 0.1
    soc = row['Battery_SoC']
    
    if soc < 20:
        return 2 # Charge (Priority)
    elif soc > 80:
        return 0 # Discharge (Priority)
    elif abs(generation - consumption) < 20: # Threshold for balance
        return 1 # Hold
    elif generation > consumption:
        return 2 # Charge (Excess energy)
    else:
        return 0 # Discharge (Deficit)

data_clf = data_processed.copy()
data_clf['Signal'] = data_clf.apply(get_balancing_signal, axis=1)

# Prepare Data for Classification
feature_cols_clf = [c for c in data_clf.columns if c not in ['Timestamp', 'Signal']]
X_clf = data_clf[feature_cols_clf]
y_clf = data_clf['Signal']

X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.2, shuffle=False)

scaler_clf = StandardScaler()
X_train_clf_scaled = scaler_clf.fit_transform(X_train_clf)
X_test_clf_scaled = scaler_clf.transform(X_test_clf)

# Train Model
model_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
model_clf.fit(X_train_clf_scaled, y_train_clf)

# Evaluate
y_pred_clf = model_clf.predict(X_test_clf_scaled)
acc = accuracy_score(y_test_clf, y_pred_clf)
print(f"Balancing Classifier Accuracy: {acc:.4f}")
print(classification_report(y_test_clf, y_pred_clf))

# Save
joblib.dump(model_clf, 'models/balancing_classifier.pkl')
joblib.dump(scaler_clf, 'models/scaler_clf.pkl')

## 4. Visualization
Visualize the predictions and system behavior.

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(y_test_reg.values[:200], label='Actual Consumption', alpha=0.7)
plt.plot(y_pred_reg[:200], label='Predicted Consumption', alpha=0.7)
plt.title('Grid Consumption Prediction (First 200 Test Samples)')
plt.legend()
plt.show()