In [18]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/wind-turbine-scada-dataset/T1.csv


In [19]:
df=pd.read_csv('/kaggle/input/wind-turbine-scada-dataset/T1.csv')
df

Unnamed: 0,Date/Time,LV ActivePower (kW),Wind Speed (m/s),Theoretical_Power_Curve (KWh),Wind Direction (°)
0,01 01 2018 00:00,380.047791,5.311336,416.328908,259.994904
1,01 01 2018 00:10,453.769196,5.672167,519.917511,268.641113
2,01 01 2018 00:20,306.376587,5.216037,390.900016,272.564789
3,01 01 2018 00:30,419.645905,5.659674,516.127569,271.258087
4,01 01 2018 00:40,380.650696,5.577941,491.702972,265.674286
...,...,...,...,...,...
50525,31 12 2018 23:10,2963.980957,11.404030,3397.190793,80.502724
50526,31 12 2018 23:20,1684.353027,7.332648,1173.055771,84.062599
50527,31 12 2018 23:30,2201.106934,8.435358,1788.284755,84.742500
50528,31 12 2018 23:40,2515.694092,9.421366,2418.382503,84.297913


In [20]:
X = df[['Wind Speed (m/s)', 'Wind Direction (°)', 'Theoretical_Power_Curve (KWh)']] 
y = df['LV ActivePower (kW)']  

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2_l= r2_score(y_test, y_pred)
mse_l = mean_squared_error(y_test, y_pred)
print(f"Linear Regression R^2: {r2_l}")
print(f"Linear Regression MSE: {mse_l}")

Linear Regression R^2: 0.9006599245385882
Linear Regression MSE: 169505.14196548512


In [23]:
from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor() 
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
r2_rf= r2_score(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"RF R^2: {r2_rf}")
print(f"RF MSE: {mse_rf}")

RF R^2: 0.8998117571725806
RF MSE: 170952.3799418793


**Predicting Range of Energy in KwH**

We estimated theoretical power curve values looking at the dataset and using its linear relationship with wind speed cubed (v^3) according to the formula: P = 0.5 x Cp * ρ * π * R^2 * v^3

P is the power generated by the wind turbine in watts (W).
0.5 is a constant that comes from the power formula, representing the power coefficient.

Cp is the coefficient of performance of the turbine (also known as power coefficient), which varies between 0 and 0.59 (Betz's limit), depending on the turbine design and operational efficiency.

ρ is the air density in kilograms per cubic meter (kg/m^3), which can vary depending on altitude and temperature.

π is the mathematical constant Pi.

R is the radius of the wind turbine's rotor in meters (m).

V is the velocity of the wind in meters per second (m/s).

In [33]:
## Using Linear Regression Model due to its Higher R^2 score and lower MSE
low = model.predict([[8, 1481, 180]])[0]
high = model.predict([[10, 2892, 270]])[0]
betz_limit = 0.5
print(f"Lowest energy produced in a day (KWH): {betz_limit*low*24}")
print(f"Highest energy produced in a day(KWH): {betz_limit*high*24}")

Lowest energy produced in a day (KWH): 10303.378568973414
Highest energy produced in a day(KWH): 19210.19509710368




***Linear Program Code***

**Budget function:** p1x1 + p2x2 + p3x3 + p4x4 + p5x5 = B



**Utility function:** U(x1, x2, x3, x4, x5) = a1x1 + a2x2 + a3x3 + a4x4 + a5x5

Coal Energy (kwH) per Dollar Function: f(x) = (1 / 0.08892)x        

Oil Energy (kwH) per Dollar Function: f(x) = (1 / 0.1958)x          (x2)

Natural Gas (kwH) Energy per Dollar Function: f(x) = (1 / 0.3204)x     (x3)

Nuclear Energy (kwH) per Dollar Function: f(x) = (1 / 0.07)x       (x4)

Biofuel and waste Energy (kwH) per Dollar Function: f(x) = (1 / 0.15)x       (x5)

Wind Energy (kwH) per Dollar Function: f(x) = (1 / 0.026)x       (x6)

In [25]:
from scipy.optimize import linprog

# Adjusted coefficients for the utility function considering fixed x4 and x5
# Now only optimizing for x1, x2, x_3, and x_6
c = [-1/0.0684, -1/0.1632, -1/0.3204, -1/0.026]  # Removed coefficients for x4 and x5

# Adjusted budget based on the fixed allocations for x4 and x5
adjusted_budget = 100 - (36.5 + 8.7)  # Subtracting the pre-allocated budget for x4 and x5

# Coefficients for the adjusted budget constraint
A_eq = [[1, 1, 1, 1]]
b_eq = [adjusted_budget]

A_ub = [[0, 0, 0, -1]]  # Only considering x6 for renewable energy now, if applicable
b_ub = [-30 + 8.7]  # Adjusting renewable requirement by subtracting x5's contribution if counted towards renewables

# Bounds for each xi (non-negativity constraint, min 5% and max 25% of adjusted budget per resource)
# Assuming these constraints still apply relative to the adjusted budget
x_bounds = [(5, 25) for _ in range(4)]  # Bounds for x1, x2, x3, and x6

# Solving the LP problem with the adjusted constraints
result = linprog(c, A_eq=A_eq, b_eq=b_eq, A_ub=A_ub, b_ub=b_ub, bounds=x_bounds, method='highs')

if result.success:
    print(f"Optimized xi values: x1 = {np.round(result.x[0],1)}, x2 = {result.x[1]}, x3 = {result.x[2]}, x6 = {result.x[3]}")
    print(f"Sum of xi values including fixed x4 and x5: {sum(result.x) + 36.5 + 8.7}")
else:
    print("Optimization failed:", result.message)

Optimized xi values: x1 = 19.8, x2 = 5.0, x3 = 5.0, x6 = 25.0
Sum of xi values including fixed x4 and x5: 100.0
