In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

plt.style.use("Solarize_Light2")  # using style ggplot
%matplotlib inline

# Load and prepare the data
df = pd.read_csv('TexasTurbine_(3).csv')

# Parse and clean datetime
df['Time stamp'] = pd.to_datetime(df['Time stamp'], format='%b %d, %I:%M %p')
df.set_index("Time stamp", inplace=True)

# Feature engineering (if necessary)
df['Month'] = df.index.month

X = df.drop(columns="System power generated | (kW)")
y = df["System power generated | (kW)"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    "LinearRegression": LinearRegression(),
    "DecisionTreeRegressor": DecisionTreeRegressor(),
    "RandomForestRegressor": RandomForestRegressor(),
    "KNeighborsRegressor": KNeighborsRegressor(),
    "MLPRegressor": MLPRegressor(max_iter=2000),
    "XGBRegressor": XGBRegressor(),
    "SVR": SVR(),
    "GradientBoostingRegressor": GradientBoostingRegressor()
}

# Define hyperparameters for GridSearchCV (example ranges, adjust as needed)
param_grid = {
    "DecisionTreeRegressor": {
        "max_depth": [None, 10, 20, 30],
        "min_samples_split": [2, 5, 10]
    },
    "RandomForestRegressor": {
        "n_estimators": [50, 100, 200],
        "max_depth": [None, 10, 20, 30]
    },
    "KNeighborsRegressor": {
        "n_neighbors": [5, 10, 15],
        "weights": ['uniform', 'distance']
    },
    "MLPRegressor": {
        "hidden_layer_sizes": [(50,), (100,), (50, 50)],
        "activation": ['relu', 'tanh'],
        "solver": ['adam']
    },
    "XGBRegressor": {
        "n_estimators": [50, 100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "max_depth": [3, 6, 9]
    },
    "SVR": {
        "kernel": ['linear', 'poly', 'rbf'],
        "C": [1, 10, 100],
        "gamma": ['scale', 'auto']
    },
    "GradientBoostingRegressor": {
        "n_estimators": [50, 100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "max_depth": [3, 6, 9]
    }
}

# Train and evaluate models
results = {}
for name, model in models.items():
    print(f"Training {name}...")
    try:
        if name in param_grid:
            grid_search = GridSearchCV(model, param_grid[name], cv=5, scoring='r2', n_jobs=-1, verbose=1)
            grid_search.fit(X_train_scaled, y_train)
            best_model = grid_search.best_estimator_
            print(f"Best parameters for {name}: {grid_search.best_params_}")
        else:
            best_model = model.fit(X_train_scaled, y_train)
        
        y_pred = best_model.predict(X_test_scaled)
        
        r2 = r2_score(y_test, y_pred) * 100
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        
        results[name] = {
            "R2 Score (%)": r2,
            "MAE": mae,
            "RMSE": rmse
        }
        
    except Exception as e:
        print(f"Error occurred while training {name}: {e}")

# Print results
print("\nModel Evaluation Results:")
for name, metrics in results.items():
    print(f"{name}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.4f}")
    print()


Training LinearRegression...
Training DecisionTreeRegressor...
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters for DecisionTreeRegressor: {'max_depth': 20, 'min_samples_split': 2}
Training RandomForestRegressor...
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters for RandomForestRegressor: {'max_depth': None, 'n_estimators': 200}
Training KNeighborsRegressor...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters for KNeighborsRegressor: {'n_neighbors': 5, 'weights': 'distance'}
Training MLPRegressor...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters for MLPRegressor: {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'solver': 'adam'}
Training XGBRegressor...
Fitting 5 folds for each of 27 candidates, totalling 135 fits
Best parameters for XGBRegressor: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200}
Training SVR...
Fitting 5 folds for each of 18 candidates, totalli