<a href="https://colab.research.google.com/github/ninay03/Ai-Powered-Predictive-Maintenance-System-for-Renewable-Energy-Plants/blob/main/Wind_Power_Plant_Trained_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('/content/merged_dataset.csv')

In [None]:
data.columns

Index(['dia_mes_ano', 'hora_minuto', 'irr', 'massaPM1', 'massaPM2', 'massaPM4',
       'massaPM10', 'numPM1', 'numPM2', 'numPM4', 'numPM10', 'tamanho_medio',
       'temp', 'vento_dir', 'vento_vel', 'rainfall'],
      dtype='object')

In [None]:
df = data.copy()

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# ----------------------------
# Step 1: Load and Preprocess Data
# ----------------------------
# Load data
df = pd.read_csv('/content/merged_dataset.csv')

# Convert datetime
df['dia_mes_ano'] = pd.to_datetime(df['dia_mes_ano'], format='%Y%m%d')

# Ensure hora_minuto is six digits long
df['hora_minuto'] = df['hora_minuto'].apply(lambda x: f"{int(x):06d}")
df['hora_minuto'] = pd.to_datetime(df['hora_minuto'], format='%H%M%S').dt.time

# Extract time features
df['hour'] = pd.to_datetime(df['hora_minuto'].astype(str), format='%H:%M:%S').dt.hour
df['day_of_week'] = df['dia_mes_ano'].dt.dayofweek
df['month'] = df['dia_mes_ano'].dt.month

# Drop original datetime columns
df = df.drop(['dia_mes_ano', 'hora_minuto'], axis=1)

# ----------------------------
# Step 2: Calculate Health Score
# ----------------------------
# Handle missing historical data in expected_irr
expected_irr = df.groupby(['month', 'hour'])['irr'].transform('mean')
expected_irr = expected_irr.fillna(df['irr'].mean())  # Fill NaN with global average

# Define PM10 threshold
PM10_THRESHOLD = 25

# Calculate health_score (handle division by zero)
df['health_score'] = (df['irr'] / expected_irr.replace(0, 1e-6)) * (1 - (df['massaPM10'] / PM10_THRESHOLD))
df['health_score'] = np.clip(df['health_score'], 0, 1)

# Drop rows with NaN in health_score
df = df.dropna(subset=['health_score'])

# ----------------------------
# Step 3: Train Model
# ----------------------------
X = df.drop('health_score', axis=1)
y = df['health_score']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [None]:
model = RandomForestRegressor(
    n_estimators=100,
    max_depth=10,
    min_samples_leaf=4,
    max_features='sqrt',  # Consider sqrt(features) at each split
    random_state=42
)


In [None]:

model.fit(X_train, y_train)

# ----------------------------
# Step 4: Evaluate Model
# ----------------------------
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"R² Score: {r2:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")

# ----------------------------

R² Score: 0.9296
Mean Absolute Error: 0.0657


In [None]:


def predict_health_score():
    json_input = input("Enter the input data as a JSON object: ")

    try:
        data = json.loads(json_input)  # Parse JSON input

        # Required keys
        required_keys = [
            'irr', 'massaPM1', 'massaPM2', 'massaPM4', 'massaPM10',
            'numPM1', 'numPM2', 'numPM4', 'hour', 'day_of_week', 'month',
            'numPM10', 'rainfall', 'tamanho_medio', 'temp', 'vento_dir', 'vento_vel'
        ]

        # Check if all required keys are present
        if not all(key in data for key in required_keys):
            missing_keys = [key for key in required_keys if key not in data]
            print(f"Missing keys in input JSON: {missing_keys}")
            return

        # Prepare input for prediction
        input_data = pd.DataFrame([data])

        # Prediction
        prediction = model.predict(input_data)
        print(f"\nPredicted Health Score: {prediction[0]:.4f}")

    except json.JSONDecodeError:
        print("Invalid JSON format. Please enter a valid JSON object.")

# Run Prediction
predict_health_score()


Enter the input data as a JSON object: {     "irr": 0,     "massaPM1": 8.58,     "massaPM2": 0.29,     "massaPM4": 0,     "massaPM10": 0,     "numPM1": 70.76,     "numPM2": 0.19,     "numPM4": 0,     "numPM10": 0,     "tamanho_medio": 0.45,     "temp": 23.62,     "vento_dir": 45,     "vento_vel": 7.98,     "rainfall": 0,     "hour": 3,     "day_of_week": 4,     "month": 10 }

Predicted Health Score: 0.0039


In [None]:
import joblib

# Save the feature order
feature_order = X_train.columns.tolist()

# Save model and feature order together
joblib.dump((model, feature_order), "wind_energy_trained_model.pkl")



['wind_energy_trained_model.pkl']