In [2]:
import pandas as pd
data = pd.read_csv("E:\\Sakthi\\prasanth\\projects\\household\\power\\Scripts\\processed_data.csv")

In [3]:
data.columns

Index(['Datetime', 'Date', 'Time', 'Global_active_power',
       'Global_reactive_power', 'Voltage', 'Global_intensity',
       'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3', 'hour', 'day',
       'weekday', 'month', 'is_weekend', 'is_peak_hour', 'daily_avg_power',
       'power_rolling_3h', 'power_rolling_6h', 'power_rolling_1d'],
      dtype='object')

In [4]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np
from sklearn.preprocessing import StandardScaler
import pickle
import os


os.makedirs("models", exist_ok=True)

def evaluate_model(name, model, X_train_scaled,  X_test_scaled, y_train, y_test):
    # Train the model
    model.fit(X_train_scaled, y_train)

    # Predict on test data
    y_pred = model.predict(X_test_scaled)

    # Model Evaluation
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    # Results
    print(f"\n{name} Model Evaluation:")
    print(f"R² Score : {r2:.4f}")
    print(f"RMSE     : {rmse:.4f}")
    print(f"MAE      : {mae:.4f}")

    # Save the model
    model_path = f'models/{name.lower().replace(" ", "_")}_model.pkl'
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)



In [6]:
# Choose features and target
features = [
    'Global_reactive_power', 'Voltage',
    'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3',
    'hour', 'day', 'weekday', 'month', 'is_weekend', 'is_peak_hour',
    'daily_avg_power', 'power_rolling_3h', 'power_rolling_6h', 'power_rolling_1d'
]

X = data[features]
y = data['Global_active_power']

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
from sklearn.linear_model import LinearRegression
evaluate_model("Linear Regression", LinearRegression(), X_train_scaled, X_test_scaled, y_train, y_test)


Linear Regression Model Evaluation:
R² Score : 0.9702
RMSE     : 0.1346
MAE      : 0.0604


In [8]:
from sklearn.neighbors import KNeighborsRegressor
evaluate_model("KNN", KNeighborsRegressor(), X_train_scaled, X_test_scaled, y_train, y_test)


KNN Model Evaluation:
R² Score : 0.9763
RMSE     : 0.1200
MAE      : 0.0454


In [9]:
from sklearn.neural_network import MLPRegressor 
evaluate_model("Neural Net", MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=300, random_state=42), X_train_scaled, X_test_scaled, y_train, y_test) # The first layer has 64 neurons to capture higher-level patterns, and the second layer has 32 neurons to refine those patterns.


Neural Net Model Evaluation:
R² Score : 0.9814
RMSE     : 0.1063
MAE      : 0.0446


In [12]:
from sklearn.ensemble import RandomForestRegressor
evaluate_model("Random Forest", RandomForestRegressor(n_estimators=20, random_state=42), X_train_scaled, X_test_scaled, y_train, y_test)


Random Forest Model Evaluation:
R² Score : 0.9802
RMSE     : 0.1097
MAE      : 0.0408


In [10]:
from sklearn.ensemble import GradientBoostingRegressor
evaluate_model("Gradient Boosting", GradientBoostingRegressor(n_estimators=20, random_state=42), X_train_scaled, X_test_scaled, y_train, y_test)


Gradient Boosting Model Evaluation:
R² Score : 0.9511
RMSE     : 0.1723
MAE      : 0.1111


In [11]:
from xgboost import XGBRegressor
evaluate_model("XGBoost", XGBRegressor(n_estimators=20, random_state=42), X_train_scaled, X_test_scaled, y_train, y_test)


XGBoost Model Evaluation:
R² Score : 0.9779
RMSE     : 0.1159
MAE      : 0.0464


In [None]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

# Define feature names used during training
features = [
    'Global_reactive_power', 'Voltage',
    'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3',
    'hour', 'day', 'weekday', 'month', 'is_weekend', 'is_peak_hour',
    'daily_avg_power', 'power_rolling_3h', 'power_rolling_6h', 'power_rolling_1d'
]

# List of model filenames
model_files = {
    "Random Forest": "models/random_forest_model.pkl",
    "Gradient Boosting": "models/gradient_boosting_model.pkl",
    "XGBoost": "models/xgboost_model.pkl",
    "Linear Regression": "models/linear_regression_model.pkl",
}

# Loop through models and plot feature importance or coefficients
for model_name, file_path in model_files.items():
    if not os.path.exists(file_path):
        print(f"{model_name} model file not found: {file_path}")
        continue

    with open(file_path, 'rb') as f:
        model = pickle.load(f)

    print(f"\nFeature importance for: {model_name}")

    try:
        # For tree-based models
        if hasattr(model, "feature_importances_"):
            importances = model.feature_importances_
            title = "Feature Importance"
        elif hasattr(model, "coef_"):  # For Linear Regression
            importances = np.abs(model.coef_)
            title = "Coefficient Magnitude"
        else:
            print(f"{model_name} does not support feature importance directly.")
            continue

        # Convert to DataFrame and sort
        importance_df = pd.DataFrame({
            'Feature': features,
            'Importance': importances
        }).sort_values(by='Importance', ascending=False)

        # Plot
        plt.figure(figsize=(8, 5))
        plt.barh(importance_df['Feature'], importance_df['Importance'], color='skyblue')
        plt.gca().invert_yaxis()
        plt.title(f"{title}: {model_name}")
        plt.xlabel("Score")
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"Error processing {model_name}: {e}")
