In [43]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Set random seed for reproducibility
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Load your new dataset
data = pd.read_csv(r"C:\Users\Sasi Kanth\Desktop\YayySOLARDATA\Dataset\Mar_merged_weather_power_data.csv")

# Drop unnecessary timestamp columns
data.drop(columns=['timestamp_x', 'timestamp_y'], inplace=True)

# Convert 'planttimestamp' to datetime
data['planttimestamp'] = pd.to_datetime(data['planttimestamp'], errors='coerce')

# Drop rows with NaT in 'planttimestamp'
data = data.dropna(subset=['planttimestamp'])
data = data[(data['planttimestamp'].dt.time >= pd.to_datetime('06:30').time()) &
            (data['planttimestamp'].dt.time <= pd.to_datetime('18:45').time())]

# Define the correct feature names based on the printed column names
features = [
    'AXPPL 380MW ICR22 WMS 6 Wind Direction (°)',
    'AXPPL 380MW ICR22 WMS 6 GHI (W/m²)',
    'AXPPL 380MW ICR22 WMS 6 Ambient Temperature (°C)',
    'AXPPL 380MW ICR22 WMS 6 Humidity (%)',
    'AXPPL 380MW ICR22 WMS 6 POA (W/m²)',
    'AXPPL 380MW ICR22 WMS 6 Wind Speed (m/s)',
    'AXPPL 380MW ICR22 WMS 6 Module Temperature (°C)'
]
target_column = 'AXPPL 380MW MCR PQM Active Power (kW)'

# Prepare feature and target datasets
X_data = data[features]
y_data = data[target_column]

# Replace any '-' with NaN and convert the columns to numeric
X_data = X_data.replace('-', np.nan).astype(float)
y_data = y_data.replace('-', np.nan).astype(float)

# Interpolate missing values (linear interpolation based on neighboring points)
X_data = X_data.interpolate(method='linear', limit_direction='both')
y_data = y_data.interpolate(method='linear', limit_direction='both')

# Drop rows where interpolation was not possible (if any NaN values still exist)
X_data = X_data.dropna()
y_data = y_data[X_data.index]  # Keep only the indices that are valid in X_data

# Feature scaling
scaler_X = MinMaxScaler()
scaled_X = scaler_X.fit_transform(X_data)

scaler_y = MinMaxScaler()
scaled_y = scaler_y.fit_transform(y_data.values.reshape(-1, 1))

# Create sequences and labels for LSTM
def create_sequences(X, y, sequence_length):
    sequences_X = []
    sequences_y = []
    for i in range(len(X) - sequence_length):
        sequences_X.append(X[i:i + sequence_length])
        sequences_y.append(y[i + sequence_length])
    return np.array(sequences_X), np.array(sequences_y)

sequence_length = 24 # Number of time steps (e.g., for 2 hours ahead)
X, y = create_sequences(scaled_X, scaled_y, sequence_length)

# Split into training and testing datasets
split = int(0.7 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Build the LSTM model
# Build the modified LSTM model with more layers and units
model = Sequential()
model.add(LSTM(units=256, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))  # Increased units
model.add(Dropout(0.4))  # Increased dropout to prevent overfitting
# model.add(LSTM(units=128, return_sequences=True))
# model.add(Dropout(0.4))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(units=1))

# Compile the model with a lower learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001)  # Reduced learning rate
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model with more epochs and smaller batch size
history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))  # Increased epochs, decreased batch size

# Predictions
predictions = model.predict(X_test)

# Inverse transform the predictions
predictions = scaler_y.inverse_transform(predictions)

# Save the model and weights
model.save('solar_model_new.h5')
model.save_weights('solar_model_new.weights.h5')

# Calculate evaluation metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

# Inverse transform y_test for comparison
y_test_inv = scaler_y.inverse_transform(y_test)

# Calculate errors
mae = mean_absolute_error(y_test_inv, predictions)
mse = mean_squared_error(y_test_inv, predictions)
mape = mean_absolute_percentage_error(y_test_inv, predictions)

# Calculate accuracy (simple method based on inverse MAPE)
accuracy = 100 - mape

# Calculate absolute errors to find max and min error timestamps
errors = np.abs(y_test_inv - predictions)
max_error_idx = np.argmax(errors)
min_error_idx = np.argmin(errors)

# Get timestamps for max and min error
timestamps = data['planttimestamp'].values[-len(y_test):]
max_error_time = timestamps[max_error_idx]
min_error_time = timestamps[min_error_idx]

# Output evaluation results
print(f"Max Error: {errors[max_error_idx]} at timestamp: {max_error_time}")
print(f"Min Error: {errors[min_error_idx]} at timestamp: {min_error_time}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}")
print(f"Accuracy: {accuracy}%")

# Optionally, save predictions and errors to a DataFrame for further analysis
predictions_df = pd.DataFrame({
    'Timestamp': timestamps,
    'Actual': y_test_inv.flatten(),
    'Predicted': predictions.flatten(),
    'Error': errors.flatten()
})

# Display predictions_df if you want to check it
predictions_df.head()


Epoch 1/50


  super().__init__(**kwargs)


[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - loss: 0.2432 - val_loss: 0.1100
Epoch 2/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0979 - val_loss: 0.1440
Epoch 3/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0665 - val_loss: 0.1724
Epoch 4/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0488 - val_loss: 0.1544
Epoch 5/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0413 - val_loss: 0.1331
Epoch 6/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0395 - val_loss: 0.1056
Epoch 7/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - loss: 0.0350 - val_loss: 0.0918
Epoch 8/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 0.0323 - val_loss: 0.0794
Epoch 9/50
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━



Max Error: [317044.267] at timestamp: 2024-11-08T14:45:00.000000000
Min Error: [8.56075] at timestamp: 2024-12-09T09:30:00.000000000
Mean Absolute Error (MAE): 56744.952040782395
Mean Squared Error (MSE): 5917029027.944354
Mean Absolute Percentage Error (MAPE): 13.490427776022347
Accuracy: 86.50957222397766%


Unnamed: 0,Timestamp,Actual,Predicted,Error
0,2024-12-07 11:15:00,250223.0,214873.859375,35349.140625
1,2024-12-07 11:30:00,251764.6,221188.15625,30576.44375
2,2024-12-07 11:45:00,263703.467,229580.5,34122.967
3,2024-12-07 12:00:00,308338.8,234508.0625,73830.7375
4,2024-12-07 12:15:00,322848.267,243123.234375,79725.032625


In [26]:
predictions_df = pd.DataFrame({'Actual': scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten(), 
                               'Predicted': predictions.flatten()})

In [42]:
predictions_df.head()

Unnamed: 0,Timestamp,Actual,Predicted,Error
0,2024-12-07 11:15:00,250223.0,240520.671875,9702.328125
1,2024-12-07 11:30:00,251764.6,231789.75,19974.85
2,2024-12-07 11:45:00,263703.467,240398.75,23304.717
3,2024-12-07 12:00:00,308338.8,234634.90625,73703.89375
4,2024-12-07 12:15:00,322848.267,257794.578125,65053.688875
