In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df=pd.read_csv('/content/drive/My Drive/T1.csv')
print(df.columns)


Index(['Date/Time', 'LV ActivePower (kW)', 'Wind Speed (m/s)',
       'Theoretical_Power_Curve (KWh)', 'Wind Direction (°)'],
      dtype='object')


In [None]:
df.drop(columns=['Date/Time','Theoretical_Power_Curve (KWh)'], inplace=True)
y = df['LV ActivePower (kW)']
df.drop(columns=['LV ActivePower (kW)'],axis=1,inplace=True)


In [None]:
df['Wind Direction (°)']=(df['Wind Direction (°)']-df['Wind Direction (°)'].mean())/(df['Wind Direction (°)'].std())
df['Wind Speed (m/s)']=(df['Wind Speed (m/s)']-df['Wind Speed (m/s)'].mean())/(df['Wind Speed (m/s)'].std())

In [None]:
y_train=y[:42283]
y_test=y[42283:]
X_train=df.iloc[:42283]
X_test=df.iloc[42283:]

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Define models
xgr = XGBRegressor()
rf = RandomForestRegressor()
lr = LinearRegression()
dt = DecisionTreeRegressor()
sm = SVR()

# Fit models and make predictions
models = {'XGBoost': xgr, 'Random Forest': rf, 'Linear Regression': lr, 'Decision Tree': dt, 'SVR': sm}
predictions = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    predictions[name] = y_pred
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f'R2-{name}: {r2}')
    print(f'RMSE-{name}: {rmse}')

# Example: Accessing predictions
#print(predictions['XGBoost'])


R2-XGBoost: 0.8378748583617237
RMSE-XGBoost: 552.9701440954856
R2-Random Forest: 0.8193920731314233
RMSE-Random Forest: 583.6398041838653
R2-Linear Regression: 0.8184357809853167
RMSE-Linear Regression: 585.1829072911474
R2-Decision Tree: 0.7203075302511192
RMSE-Decision Tree: 726.3014082694837
R2-SVR: 0.8891384161563985
RMSE-SVR: 457.26402913237996


In [None]:
params={
 "learning_rate"    : [0.05, 0.01,0.03,0.1, 0.15, 0.2] ,
 "n_estimators"     : [50, 100, 150, 200, 500, 800,1000,1500] ,
 "max_depth"        : [ 3, 4, 5, 6, 8, 10, 12, 15,20,25],
 "min_child_weight" : [ 1, 3, 5, 7 ,10,15,20,25],
 "gamma"            : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ],
 "subsample"        : [ 0.1, 0.2 , 0.3, 0.4,0.6,0.8,1 ],
 "reg_lambda"       : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ,0.6,0.8,1],
 "reg_alpha"        : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ],
 "colsample_bytree" : [ 0.3, 0.4, 0.5 , 0.7,0.9 ],
 "colsample_bylevel" : [ 0.3, 0.4, 0.5 , 0.7,0.9 ]

}

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

In [None]:
from datetime import datetime

def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
    else:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))


In [None]:
random_search = RandomizedSearchCV(xgr, param_distributions=params, n_iter=10, n_jobs=-1, cv=5, verbose=3)

# Start timing
start_time = datetime.now()

# Fit RandomizedSearchCV on training data
random_search.fit(X_train, y_train)

# End timing
print('Elapsed time:', datetime.now() - start_time)

# Print best parameters found
print('Best parameters:', random_search.best_params_)


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Elapsed time: 0:00:47.704959
Best parameters: {'subsample': 0.6, 'reg_lambda': 0.0, 'reg_alpha': 0.0, 'n_estimators': 150, 'min_child_weight': 1, 'max_depth': 4, 'learning_rate': 0.15, 'gamma': 0.4, 'colsample_bytree': 0.5, 'colsample_bylevel': 0.4}


In [None]:
random_search.best_estimator_

In [None]:
xg=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=0.7,
             colsample_bynode=1, colsample_bytree=0.3, gamma=0.2,
             importance_type='gain', learning_rate=0.03, max_delta_step=0,
             max_depth=8, min_child_weight=25, missing=None, n_estimators=800,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0.2, reg_lambda=0.8, scale_pos_weight=1, seed=None,
             silent=None, subsample=0.1, verbosity=1)
x=xgr.fit(X_train,y_train)
y1=x.predict(X_test)
r2_score(y_test,y1)

0.8378748583617237

In [None]:
r=RandomForestRegressor()
params_rf={
"n_estimators"     : [50, 100, 150, 200, 500, 800,1000,1500] ,
 "max_depth"        : [ 3, 4, 5, 6, 8, 10, 12, 15,20,25]}

In [None]:
random_search=RandomizedSearchCV(rf,param_distributions=params_rf,n_iter=10,n_jobs=-1,cv=5,verbose=3)

In [None]:
#random_search = RandomizedSearchCV(rf, param_distributions=params_rf, n_iter=10, n_jobs=-1, cv=5, verbose=3)

# Start timing
start_time = datetime.now()

# Fit RandomizedSearchCV on training data
random_search.fit(X_train, y_train)

# End timing
print('Elapsed time:', datetime.now() - start_time)

# Print best parameters found
print('Best parameters:', random_search.best_params_)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Elapsed time: 0:00:52.362419
Best parameters: {'subsample': 0.4, 'reg_lambda': 0.8, 'reg_alpha': 0.1, 'n_estimators': 150, 'min_child_weight': 20, 'max_depth': 3, 'learning_rate': 0.2, 'gamma': 0.3, 'colsample_bytree': 0.5, 'colsample_bylevel': 0.3}


In [None]:
random_search.best_estimator_

In [None]:
sv = SVR(gamma='auto', C=100, epsilon=0.4)

# Fit SVR model to training data
sv.fit(X_train, y_train)

# Predict using SVR model
y_pred_svr = sv.predict(X_test)

# Calculate R-squared score
r2_svr = r2_score(y_test, y_pred_svr)

print('R2 score for SVR:', r2_svr)

R2 score for SVR: 0.8896895156992954
