In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --- 1. Load Data ---
df = pd.read_csv(r"D:\DAU\ASSIGNMENT\FOML ASSIGN\PROJ\SAHIL\India_Renewable_Energy_MASTER_DATASET_Calculated.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = pd.get_dummies(df, columns=['Season'], prefix='Season')
df = df.sort_values(by='Date').reset_index(drop=True)



# Lag Features
df['PS_lag_1'] = df['PS'].shift(1)
df['T2M_lag_1'] = df['T2M'].shift(1)
df['RH2M_lag_1'] = df['RH2M'].shift(1)







In [None]:
target_variable = 'Wind_Power_Density'
y = df[target_variable]

season_cols = [col for col in df.columns if col.startswith("Season_")]

X = df[['Latitude', 'Longitude',
        'PS', 'PS_lag_1', 
        'T2M', 'T2M_lag_1',
        'RH2M', 'RH2M_lag_1',
        'PRECTOTCORR', 'ALLSKY_SFC_SW_DWN','WS50M'] + season_cols]

print(f"Target (y): {target_variable}")
print(f"Final Feature Set (X): {list(X.columns)}")

# Log-transform target
y_log = np.log1p(y)

# Chronological Split
test_size = 0.2
split_index = int(len(df) * (1 - test_size))

X_train = X.iloc[:split_index]
X_test = X.iloc[split_index:]
y_train_log = y_log.iloc[:split_index]
y_test_log = y_log.iloc[split_index:]

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")


In [None]:

model = LinearRegression()

model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("R² Score:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


R² Score: 0.6548230739520089
MAE: 0.018151701030491997
RMSE: 0.03366517441798998


In [None]:
from sklearn.linear_model import Ridge
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


model = Ridge()
model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


R²: 0.6548227221816256
MAE: 0.018151661197894804
RMSE: 0.033665191572103424


In [None]:
from sklearn.tree import DecisionTreeRegressor
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("=== Decision Tree Results ===")
print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


=== Decision Tree Results ===
R²: 0.9828358095590805
MAE: 0.003021613568205827
RMSE: 0.007507090521815956


In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

model = GradientBoostingRegressor()
model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("=== Gradient Boosting Results ===")
print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


=== Gradient Boosting Results ===
R²: 0.986193166314309
MAE: 0.0032963245045991908
RMSE: 0.006732975546882781


In [104]:
from lightgbm import LGBMRegressor
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

model = LGBMRegressor(num_leaves=4,max_depth=1,n_estimators=20,learning_rate=0.5)
model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("=== LightGBM Results ===")
print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2373
[LightGBM] [Info] Number of data points in the train set: 105205, number of used features: 15
[LightGBM] [Info] Start training from score 0.027132
=== LightGBM Results ===
R²: 0.8866256933022753
MAE: 0.008092697185892662
RMSE: 0.01929377957730708


In [None]:
from sklearn.ensemble import ExtraTreesRegressor
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

model = ExtraTreesRegressor(random_state=42)
model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("=== ExtraTrees Regression Results ===")
print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


=== ExtraTrees Regression Results ===
R²: 0.9926692647598984
MAE: 0.0020747814814386215
RMSE: 0.00490607019565892


In [None]:
from sklearn.svm import SVR
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

model = SVR(kernel='rbf')  
model.fit(X_train, y_train_log)
y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)


r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("=== SVR Results ===")
print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


=== SVR Results ===
R²: 0.318343611183294
MAE: 0.03869460946127281
RMSE: 0.04730888856205982


In [None]:
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error



model = KNeighborsRegressor()

model=model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)

y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)


r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("=== KNN Regression Results ===")
print("R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


=== KNN Regression Results ===
R²: 0.6601116081415074
MAE: 0.013443992504223647
RMSE: 0.03340628306004892


In [102]:
from sklearn.ensemble import BaggingRegressor
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

model = BaggingRegressor()

model.fit(X_train, y_train_log)

y_pred_log = model.predict(X_test)

y_pred = np.expm1(y_pred_log)
y_test_orig = np.expm1(y_test_log)

r2 = r2_score(y_test_orig, y_pred)
mae = mean_absolute_error(y_test_orig, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))

print("Bagging R²:", r2)
print("MAE:", mae)
print("RMSE:", rmse)


Bagging R²: 0.9889027514450502
MAE: 0.002291183363516087
RMSE: 0.00603625595925786
