In [None]:
ML MODELS

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from datetime import datetime

# Load data
df = pd.read_csv('/content/drive/My Drive/T1.csv')

# Remove irrelevant columns
df.drop(columns=['Date/Time', 'Theoretical_Power_Curve (KWh)'], inplace=True)

# Normalize features
scaler = MinMaxScaler()
df[['Wind Direction (°)', 'Wind Speed (m/s)']] = scaler.fit_transform(df[['Wind Direction (°)', 'Wind Speed (m/s)']])

# Split data into features (X) and target variable (y)
y = df['LV ActivePower (kW)']
X = df.drop(columns=['LV ActivePower (kW)'])

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'XGBoost': XGBRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(),
    'SVR': SVR()
}

# Fit models and make predictions
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f'R2-{name}: {r2}')
    print(f'RMSE-{name}: {rmse}')

# Hyperparameter tuning for XGBoost
params_xgb = {
    "learning_rate": [0.05, 0.01, 0.03, 0.1, 0.15, 0.2],
    "n_estimators": [50, 100, 150, 200, 500, 800, 1000, 1500],
    "max_depth": [3, 4, 5, 6, 8, 10, 12, 15, 20, 25],
    "min_child_weight": [1, 3, 5, 7, 10, 15, 20, 25],
    "gamma": [0.0, 0.1, 0.2, 0.3, 0.4],
    "subsample": [0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1],
    "reg_lambda": [0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1],
    "reg_alpha": [0.0, 0.1, 0.2, 0.3, 0.4],
    "colsample_bytree": [0.3, 0.4, 0.5, 0.7, 0.9],
    "colsample_bylevel": [0.3, 0.4, 0.5, 0.7, 0.9]
}

random_search_xgb = RandomizedSearchCV(XGBRegressor(), param_distributions=params_xgb, n_iter=10, n_jobs=-1, cv=5, verbose=3)

start_time = datetime.now()
random_search_xgb.fit(X_train, y_train)
print('Elapsed time:', datetime.now() - start_time)
print('Best parameters for XGBoost:', random_search_xgb.best_params_)

# Hyperparameter tuning for Random Forest
params_rf = {
    "n_estimators": [50, 100, 150, 200, 500, 800, 1000, 1500],
    "max_depth": [3, 4, 5, 6, 8, 10, 12, 15, 20, 25]
}

random_search_rf = RandomizedSearchCV(RandomForestRegressor(), param_distributions=params_rf, n_iter=10, n_jobs=-1, cv=5, verbose=3)

start_time = datetime.now()
random_search_rf.fit(X_train, y_train)
print('Elapsed time:', datetime.now() - start_time)
print('Best parameters for Random Forest:', random_search_rf.best_params_)

# SVR
svr = SVR(gamma='auto', C=100, epsilon=0.4)
svr.fit(X_train, y_train)
y_pred_svr = svr.predict(X_test)
r2_svr = r2_score(y_test, y_pred_svr)
print('R2 score for SVR:', r2_svr)
