<a href="https://colab.research.google.com/github/ninay03/Ai-Powered-Predictive-Maintenance-System-for-Renewable-Energy-Plants/blob/main/Wind_Power_Plant_Trained_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [13]:
data = pd.read_csv('/content/merged_dataset.csv')

In [14]:
data.columns

Index(['dia_mes_ano', 'hora_minuto', 'irr', 'massaPM1', 'massaPM2', 'massaPM4',
       'massaPM10', 'numPM1', 'numPM2', 'numPM4', 'numPM10', 'tamanho_medio',
       'temp', 'vento_dir', 'vento_vel', 'rainfall'],
      dtype='object')

In [15]:
df = data.copy()

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# Step 1: Load and Preprocess Data

# Load data
df = pd.read_csv('/content/merged_dataset.csv')

# Convert datetime

df['dia_mes_ano'] = pd.to_datetime(df['dia_mes_ano'], format='%Y%m%d')

# Ensure hora_minuto is six digits long

df['hora_minuto'] = df['hora_minuto'].apply(lambda x: f"{int(x):06d}")
df['hora_minuto'] = pd.to_datetime(df['hora_minuto'], format='%H%M%S').dt.time

# Extract time features

df['hour'] = pd.to_datetime(df['hora_minuto'].astype(str), format='%H:%M:%S').dt.hour
df['day_of_week'] = df['dia_mes_ano'].dt.dayofweek
df['month'] = df['dia_mes_ano'].dt.month

# Drop original datetime columns

df = df.drop(['dia_mes_ano', 'hora_minuto'], axis=1)

# Step 2: Calculate Health Score

# Handle missing historical data in expected_irr

expected_irr = df.groupby(['month', 'hour'])['irr'].transform('mean')
expected_irr = expected_irr.fillna(df['irr'].mean())

PM10_THRESHOLD = 25

# Calculate health_score (handle division by zero)

df['health_score'] = (df['irr'] / expected_irr.replace(0, 1e-6)) * (1 - (df['massaPM10'] / PM10_THRESHOLD))
df['health_score'] = np.clip(df['health_score'], 0, 1)

# Drop rows with NaN in health_score

df = df.dropna(subset=['health_score'])


# Step 3: Train Model

X = df.drop('health_score', axis=1)
y = df['health_score']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [17]:
model = RandomForestRegressor(
    n_estimators=100,
    max_depth=10,
    min_samples_leaf=4,
    max_features='sqrt',  # Consider sqrt(features) at each split
    random_state=42
)


In [18]:

model.fit(X_train, y_train)


# Step 4: Evaluate Model

y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"R² Score: {r2:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")



R² Score: 0.9296
Mean Absolute Error: 0.0657


In [26]:
 # For when deployed to Real Time data

 def predict_health_score():
      json_input = input("Enter the input data as a JSON object: ")

      try:
          data = json.loads(json_input)  # Parse JSON input

          # Required keys
          required_keys = [
              'irr', 'massaPM1', 'massaPM2', 'massaPM4', 'massaPM10',
              'numPM1', 'numPM2', 'numPM4', 'hour', 'day_of_week', 'month',
              'numPM10', 'rainfall', 'tamanho_medio', 'temp', 'vento_dir', 'vento_vel'
          ]

          # Check if all required keys are present
          if not all(key in data for key in required_keys):
              missing_keys = [key for key in required_keys if key not in data]
              print(f"Missing keys in input JSON: {missing_keys}")
              return

          # Prepare input for prediction
          input_data = pd.DataFrame([data])

          # Prediction
          prediction = model.predict(input_data)
          print(f"\nPredicted Health Score: {prediction[0]:.4f}")

      except json.JSONDecodeError:
          print("Invalid JSON format. Please enter a valid JSON object.")

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler # Import the StandardScaler

def predict_health_score():

    scaler_X = StandardScaler()
    scaler_X.fit(X)

    scaler_y = StandardScaler()  # Initialize for y
    y_reshaped = y.values.reshape(-1, 1)  # Reshape for scaler
    scaler_y.fit(y_reshaped)

    # Collect input values manually
    dia_mes_ano = input("Enter dia_mes_ano: ")
    hora_minuto = input("Enter hora_minuto: ")
    irr = float(input("Enter irr: "))
    massaPM1 = float(input("Enter massaPM1: "))
    massaPM2 = float(input("Enter massaPM2: "))
    massaPM4 = float(input("Enter massaPM4: "))
    massaPM10 = float(input("Enter massaPM10: "))
    numPM1 = float(input("Enter numPM1: "))
    numPM2 = float(input("Enter numPM2: "))
    numPM4 = float(input("Enter numPM4: "))
    numPM10 = float(input("Enter numPM10: "))
    tamanho_medio = float(input("Enter tamanho_medio: "))
    temp = float(input("Enter temp: "))
    vento_dir = float(input("Enter vento_dir: "))
    vento_vel = float(input("Enter vento_vel: "))
    rainfall = float(input("Enter rainfall: "))

    # Create input DataFrame (excluding date & time since they may not be used in prediction)
    input_data = pd.DataFrame([[
        irr, massaPM1, massaPM2, massaPM4, massaPM10,
        numPM1, numPM2, numPM4, numPM10, tamanho_medio,
        temp, vento_dir, vento_vel, rainfall
    ]], columns=[
        'irr', 'massaPM1', 'massaPM2', 'massaPM4', 'massaPM10',
        'numPM1', 'numPM2', 'numPM4', 'numPM10', 'tamanho_medio',
        'temp', 'vento_dir', 'vento_vel', 'rainfall'
    ])

    # Scale the input data
    input_scaled = scaler_X.transform(input_data)

    # Reshape for LSTM
    input_scaled = np.reshape(input_scaled, (input_scaled.shape[0], 1, input_scaled.shape[1]))

    # Make prediction
    prediction = model.predict(input_scaled)

    # Inverse transform the prediction
    predicted_score = scaler_y.inverse_transform(prediction)

    print(f"\nPredicted Health Score: {predicted_score[0][0]:.4f}")

# Call function
predict_health_score()




In [20]:
import joblib

# Save the feature order
feature_order = X_train.columns.tolist()

# Save model and feature order together
joblib.dump((model, feature_order), "wind_energy_trained_model.pkl")



['wind_energy_trained_model.pkl']

In [21]:
import pandas as pd
import numpy as np
import json
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import warnings

warnings.filterwarnings('ignore')

# Step 1: Load and Preprocess Data
df = pd.read_csv('/content/merged_dataset.csv')

# Convert datetime
df['dia_mes_ano'] = pd.to_datetime(df['dia_mes_ano'], format='%Y%m%d')

# Ensure 'hora_minuto' is formatted correctly
df['hora_minuto'] = df['hora_minuto'].apply(lambda x: f"{int(x):06d}")
df['hora_minuto'] = pd.to_datetime(df['hora_minuto'], format='%H%M%S').dt.time

# Extract time features
df['hour'] = pd.to_datetime(df['hora_minuto'].astype(str), format='%H:%M:%S').dt.hour
df['day_of_week'] = df['dia_mes_ano'].dt.dayofweek
df['month'] = df['dia_mes_ano'].dt.month

# Drop original datetime columns
df = df.drop(['dia_mes_ano', 'hora_minuto'], axis=1)

# Step 2: Calculate Health Score
expected_irr = df.groupby(['month', 'hour'])['irr'].transform('mean')
expected_irr = expected_irr.fillna(df['irr'].mean())

PM10_THRESHOLD = 25
df['health_score'] = (df['irr'] / expected_irr.replace(0, 1e-6)) * (1 - (df['massaPM10'] / PM10_THRESHOLD))
df['health_score'] = np.clip(df['health_score'], 0, 1)

# Drop rows with NaN in health_score
df = df.dropna(subset=['health_score'])

# Step 3: Train Model
X = df.drop('health_score', axis=1)
y = df['health_score']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Model
model = RandomForestRegressor(
    n_estimators=100,
    max_depth=10,
    min_samples_leaf=4,
    max_features='sqrt',
    random_state=42
)

model.fit(X_train, y_train)

# Step 4: Evaluate Model
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"R² Score: {r2:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")

# Step 5: Save Model as Pickle File
model_filename = "wind_energy_trained_model.pkl"
joblib.dump((model, X_train.columns.tolist()), model_filename)

print(f"Model saved successfully as {model_filename}")


R² Score: 0.9296
Mean Absolute Error: 0.0657
Model saved successfully as wind_energy_trained_model.pkl


In [22]:
# Load the trained model
def load_model():
    model_filename = "wind_energy_trained_model.pkl"
    loaded_model, feature_order = joblib.load(model_filename)
    return loaded_model, feature_order

# Predict Health Score
def predict_health_score():
    json_input = input("Enter the input data as a JSON object: ")

    try:
        data = json.loads(json_input)  # Parse JSON input
        model, feature_order = load_model()

        # Required keys (ensure they match feature names exactly)
        required_keys = feature_order

        # Check if all required keys are present
        missing_keys = [key for key in required_keys if key not in data]
        if missing_keys:
            print(f"Missing keys in input JSON: {missing_keys}")
            return

        # Convert JSON to DataFrame
        input_data = pd.DataFrame([data])

        # Ensure correct feature order
        input_data = input_data[feature_order]

        # Make Prediction
        prediction = model.predict(input_data)
        print(f"\nPredicted Health Score: {prediction[0]:.4f}")

    except json.JSONDecodeError:
        print("Invalid JSON format. Please enter a valid JSON object.")

# Run Prediction
predict_health_score()


Enter the input data as a JSON object: 20191018	800	0	8.38	0.28	0	0	69.11	0.19	0	0	0.47	24	45	9.31	0
Invalid JSON format. Please enter a valid JSON object.
