In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# 1. Load Dataset
data = pd.read_csv("renewable_energy.csv") 
print(data.head())

# 2. Preprocessing
data = data.dropna()


data["Date"] = pd.to_datetime(data["Date"])
data = data.set_index("Date")

X = data[["Wind", "Solar", "Hydro"]]
y = data["Energy"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 3. ML Models
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train, y_train)

# 4. Evaluation
models = {"Linear Regression": lin_reg, "Random Forest": rf_reg}
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(name, "R2:", r2_score(y_test, y_pred), "RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

# 5. LSTM for Time Series
X_lstm = np.array(X_scaled)
y_lstm = np.array(y)

sequence_length = 10
X_seq, y_seq = [], []
for i in range(len(X_lstm)-sequence_length):
    X_seq.append(X_lstm[i:i+sequence_length])
    y_seq.append(y_lstm[i+sequence_length])
X_seq, y_seq = np.array(X_seq), np.array(y_seq)

X_train_seq, X_test_seq = X_seq[:int(0.8*len(X_seq))], X_seq[int(0.8*len(X_seq)):]
y_train_seq, y_test_seq = y_seq[:int(0.8*len(y_seq))], y_seq[int(0.8*len(y_seq)):] 

model = Sequential([
    LSTM(50, activation='relu', input_shape=(sequence_length, X_seq.shape[2])),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train_seq, y_train_seq, epochs=10, batch_size=16, validation_split=0.2)

lstm_pred = model.predict(X_test_seq)
print("LSTM R2:", r2_score(y_test_seq, lstm_pred), "RMSE:", np.sqrt(mean_squared_error(y_test_seq, lstm_pred)))

import joblib

joblib.dump(rf_reg, "energy_model.pkl")
joblib.dump(scaler, "scaler.pkl")


         Date       Wind      Solar       Hydro     Energy
0  2020-01-01  49.963210  58.871320   46.661964  39.478365
1  2020-01-02  96.057145  47.526746   78.771085  89.495526
2  2020-01-03  78.559515  31.666933  108.565125  63.545000
3  2020-01-04  67.892679  66.965651   95.900240  64.989089
4  2020-01-05  32.481491  57.931182  102.590503  59.251053
Linear Regression R2: 0.8730249404872472 RMSE: 5.334007731968931
Random Forest R2: 0.8210700623082069 RMSE: 6.331927501861734


  super().__init__(**kwargs)


Epoch 1/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 3646.8594 - val_loss: 3438.4546
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 3388.9800 - val_loss: 2207.8491
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 983.2682 - val_loss: 721.7872
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 663.4990 - val_loss: 568.8179
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 557.0576 - val_loss: 632.3401
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 511.3383 - val_loss: 510.7228
Epoch 7/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 463.8595 - val_loss: 496.9765
Epoch 8/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 443.0268 - val_loss: 531.6234
Epoch 9/10
[1m

['scaler.pkl']