<a href="https://colab.research.google.com/github/maimuna-noshin/Solar_Prediction/blob/main/solarprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import get_custom_objects
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pickle
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, BatchNormalization, Flatten, Bidirectional, LayerNormalization, Input, Layer
from sklearn.preprocessing import StandardScaler
from scipy.fftpack import fft
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import Sequential, Model

In [55]:
df=pd.read_csv("/content/updated_dataset.csv")

In [56]:
# Convert to datetime
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']])




In [57]:
# Selecting features (X) and target variables (Y)
features = ['Hour', 'Temperature_A','Pressure_A', 'Day','Month', 'Surface Albedo_A', 'Cloud Type_A']
targets = ['DNI_A']  # Target variables for all three locations

In [58]:

X = df[features].values
y = df[targets].values

In [59]:
# Keep timestamps before scaling
timestamps = df[['Month', 'Day', 'Hour']]

In [60]:
hours_tensor = tf.convert_to_tensor(df['Hour'].values, dtype=tf.float32)


In [61]:
def custom_loss_fixed(y_true, y_pred):
    batch_size = tf.shape(y_true)[0]
    batch_hours = tf.gather(hours_tensor, tf.range(batch_size))  # Use hours_tensor instead of hours

    nighttime_mask = tf.logical_or(tf.less_equal(batch_hours, 8), tf.greater_equal(batch_hours, 18))
    nighttime_mask = tf.cast(nighttime_mask, tf.float32)

    mse_loss = tf.keras.losses.MSE(y_true, y_pred)
    penalty_factor = 50.0
    weighted_loss = mse_loss * (1 + (penalty_factor - 1) * nighttime_mask)

    return tf.reduce_mean(weighted_loss)




In [62]:
scaler_X = MinMaxScaler()
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
scaler_y = MinMaxScaler(feature_range=(0, 1000))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Save the feature scaler
with open("scaler_X.pkl", "wb") as f:
    pickle.dump(scaler_X, f)

# Save the target scaler
with open("scaler_y.pkl", "wb") as f:
    pickle.dump(scaler_y, f)
# Create binary classification labels (0 if DNI_A == 0, 1 if DNI_A > 0)
y_class = (y > 0).astype(int)


In [63]:
class FeatureAttention(Layer):
    def __init__(self, feature_dim):
        super(FeatureAttention, self).__init__()
        self.feature_weights = Dense(feature_dim, activation="softmax")  # Learnable feature weights

    def call(self, inputs):
        weights = self.feature_weights(inputs)  # Generate importance scores
        return inputs * weights  # Apply attention weights to features


# Define input
input_features = Input(shape=(X_train_class.shape[1],))

# Apply feature attention
x = FeatureAttention(X_train_class.shape[1])(input_features)



In [64]:

# Split Data for classification
X_train_class, X_test_class, y_train_class, y_test_class, timestamps_train, timestamps_test= train_test_split(X_scaled, y_class,timestamps, test_size=0.2, random_state=42)

In [65]:
classifier = Sequential([
    Dense(128, activation="relu", input_shape=(X_train_class.shape[1],)),
    Dense(64, activation="relu"),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")  # Binary classification
])


classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
classifier.fit(X_train_class, y_train_class, epochs=10, batch_size=8, validation_data=(X_test_class, y_test_class))


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.7685 - loss: 0.4525 - val_accuracy: 0.9207 - val_loss: 0.2125
Epoch 2/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9395 - loss: 0.1488 - val_accuracy: 0.9372 - val_loss: 0.1380
Epoch 3/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9485 - loss: 0.1257 - val_accuracy: 0.9441 - val_loss: 0.1389
Epoch 4/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9451 - loss: 0.1260 - val_accuracy: 0.9509 - val_loss: 0.1277
Epoch 5/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9499 - loss: 0.1154 - val_accuracy: 0.9395 - val_loss: 0.1410
Epoch 6/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9537 - loss: 0.1114 - val_accuracy: 0.9503 - val_loss: 0.1105
Epoch 7/10
[1m876/876[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x7fa0441898d0>

In [66]:
# Correct Boolean Masking for Selecting Nonzero DNI_A Cases
daytime_mask = y > 0  # Creates a boolean mask where DNI_A > 0
X_day = X_scaled[daytime_mask.flatten()]  # Apply mask to X_scaled
y_day = y[daytime_mask]  # Apply mask to y


In [67]:
# Split Data for Regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_day, y_day, test_size=0.2, random_state=42)

In [68]:
# Regressor Model (Predicts DNI_A for nonzero cases)
regressor = Sequential([
    Dense(128, activation="relu", input_shape=(X_train_reg.shape[1],)),
    Dense(64, activation="relu"),
    Dense(1, activation="linear")
])

regressor.compile(optimizer="adam", loss="mse", metrics=["mae"])
regressor.fit(X_train_reg, y_train_reg, epochs=10, batch_size=8, validation_data=(X_test_reg, y_test_reg))


Epoch 1/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 127656.0391 - mae: 292.9804 - val_loss: 45222.1953 - val_mae: 175.0984
Epoch 2/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 45256.9961 - mae: 177.8821 - val_loss: 43435.4414 - val_mae: 173.5189
Epoch 3/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 45377.7031 - mae: 179.5972 - val_loss: 42015.1250 - val_mae: 168.9384
Epoch 4/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 44775.3633 - mae: 176.2328 - val_loss: 41695.5117 - val_mae: 168.0025
Epoch 5/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 42749.6836 - mae: 170.8375 - val_loss: 42679.6055 - val_mae: 170.5564
Epoch 6/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 42554.0859 - mae: 172.1966 - val_loss: 41356.7188 - val_mae: 166.3071
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x7fa044220b50>

In [69]:
# Predict probability of nonzero irradiance
is_daytime_mask = classifier.predict(X_test_class) > 0.48  # Boolean mask for daytime

# Ensure X_test_reg and is_daytime_mask have compatible sizes
min_size = min(len(is_daytime_mask), len(X_test_reg))
is_daytime_mask = is_daytime_mask[:min_size]
X_test_reg = X_test_reg[:min_size]

# Filter indices where it's daytime
daytime_indices = np.where(is_daytime_mask)[0]

# Predict irradiance only for daytime cases
X_test_reg_daytime = X_test_reg[daytime_indices]  # Select only daytime data
irradiance_predictions = regressor.predict(X_test_reg_daytime).flatten()

# Initialize final prediction array with zeros
y_pred = np.zeros(min_size, dtype=float)

# Assign irradiance predictions only to daytime indices
y_pred[daytime_indices] = irradiance_predictions  # Map regression predictions correctly

# Convert predictions into a DataFrame with timestamps
predictions_df = timestamps_test.iloc[:min_size].copy()
predictions_df["DNI_A"] = y_pred  # Assign predicted irradiance

# Print first 100 predictions with date and hour
print(predictions_df.head(100))

# Save predictions to CSV
predictions_df.to_csv("solar_irradiance_predictions.csv", index=False)



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
      Month  Day  Hour       DNI_A
6056      9   10     8  426.797882
5556      8   20    12  355.509216
5990      9    7    14  298.844269
7674     11   16    18    0.000000
3319      5   19     7  282.820038
...     ...  ...   ...         ...
6006      9    8     6    0.000000
8063     12    2    23    0.000000
14        1    1    14  288.338837
5306      8   10     2    0.000000
8524     12   22     4    0.000000

[100 rows x 4 columns]


In [70]:
# Save the classifier model
classifier.save("solar_classifier.keras")

# Save the regressor model
regressor.save("solar_regressor.keras")

In [71]:
#  Load the scalers used during training

with open("scaler_X.pkl", "rb") as f:
    scaler_X = pickle.load(f)
with open("scaler_y.pkl", "rb") as f:
    scaler_y = pickle.load(f)

In [72]:
# Generate timestamps for the first week of March 2025 (hourly)
date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")
future_df = pd.DataFrame({"datetime": date_range})

  date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")


In [73]:
#  Feature Engineering
future_df["day"] = future_df["datetime"].dt.day
future_df["month"] = future_df["datetime"].dt.month
future_df["hour"] = future_df["datetime"].dt.hour
future_df.drop(columns=["datetime"], inplace=True)

In [74]:
# Generate random values
future_df["Temperature_A"] = np.random.uniform(27, 30, size=168)  # Temperature between 15°C to 30°C
future_df["Pressure_A"] = np.random.uniform(1000, 1050, size=168)  # Pressure between 950-1050 hPa
future_df["Surface Albedo_A"] = np.random.uniform(0.1, 0.5, size=168)  # Albedo between 0.1 - 0.5
future_df["Cloud Type_A"] = np.random.randint(0, 4, size=168)  # Cloud types (0-3)

# Merge the first week’s random values with `future_df`
#future_df = future_df.merge(future_df, on=["Month", "Day"], how="left")

# Time-based encoding (sin/cos transformation)
#future_df["hour_sin"] = np.sin(2 * np.pi * future_df["hour"] / 24)
#future_df["hour_cos"] = np.cos(2 * np.pi * future_df["hour"] / 24)

# Select the same features used in training
#X_future = future_df[['hour_sin', 'hour_cos', 'Temperature_A', 'Pressure_A', 'day', 'Month', 'Surface Albedo_A', 'Cloud Type_A']].values

In [75]:
#  Select the same features used in training
X_future = future_df[['hour', 'Temperature_A','Pressure_A', 'day','month', 'Surface Albedo_A', 'Cloud Type_A']].values
#  Scale future data using the trained scaler
X_future_scaled = scaler_X.transform(X_future)
print(X_future_scaled)


[[ 0.          0.60261341  1.79249407 ...  0.18181818  1.09835445
   0.22222222]
 [ 0.04347826  0.65531961  1.06460368 ...  0.18181818 -0.07951387
   0.11111111]
 [ 0.08695652  0.58369543  0.64415349 ...  0.18181818  0.30208036
   0.33333333]
 ...
 [ 0.91304348  0.64779689  0.42068025 ...  0.18181818  5.51593665
   0.11111111]
 [ 0.95652174  0.6524072   0.83999401 ...  0.18181818  1.55423864
   0.33333333]
 [ 1.          0.6657793   1.83712311 ...  0.18181818  0.4978481
   0.11111111]]


In [76]:
# Predict probability of nonzero irradiance
is_daytime_future = classifier.predict(X_future_scaled) > 0.48  # Boolean mask for daytime

# Ensure X_future_scaled and is_daytime_future have compatible sizes
min_size_future = min(len(is_daytime_future), len(X_future_scaled))
is_daytime_future = is_daytime_future[:min_size_future]
X_future_scaled = X_future_scaled[:min_size_future]

# Filter indices where it's daytime
daytime_indices_future = np.where(is_daytime_future)[0]

# Predict irradiance only for daytime cases
X_future_scaled_daytime = X_future_scaled[daytime_indices_future]
irradiance_future_scaled = regressor.predict(X_future_scaled_daytime).flatten()

# Initialize final prediction array with zeros
y_future_scaled = np.zeros(min_size_future, dtype=float)

# Assign irradiance predictions only to daytime indices
y_future_scaled[daytime_indices_future] = irradiance_future_scaled

# Inverse scale predictions
y_future = scaler_y.inverse_transform(y_future_scaled.reshape(-1, 1))

# Set small irradiance values to 0
y_future[y_future < 15] = 0  # Adjust threshold if needed



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [77]:
# Save predictions to DataFrame
future_df[['DNI_A']] = y_future

#  Save to CSV
future_df.to_csv("Future_Solar_Predictions_March2025.csv", index=False)

#Display first 10 predictions
print(future_df.head(10))

   day  month  hour  Temperature_A   Pressure_A  Surface Albedo_A  \
0    1      3     0      27.845197  1044.774822          0.194918   
1    1      3     1      29.563419  1022.938110          0.136024   
2    1      3     2      27.228471  1010.324605          0.155104   
3    1      3     3      29.001797  1032.110013          0.334024   
4    1      3     4      27.527396  1038.896421          0.444265   
5    1      3     5      29.195919  1046.968741          0.321726   
6    1      3     6      27.028032  1043.307186          0.208534   
7    1      3     7      28.829989  1005.648000          0.119935   
8    1      3     8      28.348895  1035.582038          0.106579   
9    1      3     9      27.560904  1023.433470          0.372074   

   Cloud Type_A       DNI_A  
0             2    0.000000  
1             1    0.000000  
2             3    0.000000  
3             1    0.000000  
4             2    0.000000  
5             2    0.000000  
6             0    0.000000  
