<a href="https://colab.research.google.com/github/maimuna-noshin/Solar_Prediction/blob/main/solarprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import get_custom_objects
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pickle
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, BatchNormalization, Flatten, Bidirectional, LayerNormalization, Input
from sklearn.preprocessing import StandardScaler
from scipy.fftpack import fft
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import Sequential, Model

In [42]:
df=pd.read_csv("/content/updated_dataset.csv")

In [43]:
# Convert to datetime
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']])




In [44]:
# Selecting features (X) and target variables (Y)
features = ['Hour', 'Temperature_A','Pressure_A', 'Day','Month', 'Surface Albedo_A', 'Cloud Type_A']
targets = ['DNI_A']  # Target variables for all three locations

In [45]:

X = df[features].values
y = df[targets].values

In [46]:
# Keep timestamps before scaling
timestamps = df[['Month', 'Day', 'Hour']]

In [47]:
scaler_X = MinMaxScaler()
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
scaler_y = MinMaxScaler(feature_range=(0, 1000))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Save the feature scaler
with open("scaler_X.pkl", "wb") as f:
    pickle.dump(scaler_X, f)

# Save the target scaler
with open("scaler_y.pkl", "wb") as f:
    pickle.dump(scaler_y, f)
# Create binary classification labels (0 if DNI_A == 0, 1 if DNI_A > 0)
y_class = (y > 0).astype(int)


In [48]:
hours_tensor = tf.convert_to_tensor(df['Hour'].values, dtype=tf.float32)


In [49]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test, timestamps_train, timestamps_test = train_test_split(X_scaled, y_scaled,timestamps, test_size=0.2, random_state=42)

# Correct Boolean Masking for Selecting Nonzero DNI_A Cases
daytime_mask = y > 0  # Creates a boolean mask where DNI_A > 0
X_day = X_scaled[daytime_mask.flatten()]  # Apply mask to X_scaled
y_day = y[daytime_mask]  # Apply mask to y

# Split Data for Regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_day, y_day, test_size=0.2, random_state=42)


In [50]:
def custom_loss_fixed(y_true, y_pred):
    batch_size = tf.shape(y_true)[0]
    batch_hours = tf.gather(hours_tensor, tf.range(batch_size))  # Use hours_tensor instead of hours

    nighttime_mask = tf.logical_or(tf.less_equal(batch_hours, 8), tf.greater_equal(batch_hours, 18))
    nighttime_mask = tf.cast(nighttime_mask, tf.float32)

    mse_loss = tf.keras.losses.MSE(y_true, y_pred)
    penalty_factor = 50.0
    weighted_loss = mse_loss * (1 + (penalty_factor - 1) * nighttime_mask)

    return tf.reduce_mean(weighted_loss)




In [51]:
# Classifier Model (Binary: 0 = No irradiance, 1 = Some irradiance)
classifier = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")  # Output probability of nonzero irradiance
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [52]:
classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
classifier.fit(X_train, y_train_class, epochs=10, batch_size=8, validation_data=(X_test, y_test_class))

Epoch 1/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.7104 - loss: 0.5573 - val_accuracy: 0.9252 - val_loss: 0.2153
Epoch 2/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9238 - loss: 0.1886 - val_accuracy: 0.9167 - val_loss: 0.1738
Epoch 3/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9412 - loss: 0.1396 - val_accuracy: 0.9378 - val_loss: 0.1441
Epoch 4/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9402 - loss: 0.1337 - val_accuracy: 0.9418 - val_loss: 0.1323
Epoch 5/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9410 - loss: 0.1257 - val_accuracy: 0.9481 - val_loss: 0.1238
Epoch 6/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9525 - loss: 0.1173 - val_accuracy: 0.9509 - val_loss: 0.1205
Epoch 7/10
[1m876/876[0m 

<keras.src.callbacks.history.History at 0x7dde3ab32690>

In [53]:
# Regressor Model (Predicts DNI_A for nonzero cases)
regressor = Sequential([
    Dense(128, activation="relu", input_shape=(X_train_reg.shape[1],)),
    Dense(64, activation="relu"),
    Dense(1, activation="linear")
])

regressor.compile(optimizer="adam", loss="mse", metrics=["mae"])
regressor.fit(X_train_reg, y_train_reg, epochs=10, batch_size=8, validation_data=(X_test_reg, y_test_reg))


Epoch 1/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 135252.9375 - mae: 304.0044 - val_loss: 45217.3906 - val_mae: 175.8878
Epoch 2/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 47860.8867 - mae: 182.4039 - val_loss: 43234.5977 - val_mae: 172.8072
Epoch 3/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 46131.1016 - mae: 180.6871 - val_loss: 42129.3047 - val_mae: 169.5881
Epoch 4/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 43996.8633 - mae: 174.8801 - val_loss: 41850.5000 - val_mae: 167.4657
Epoch 5/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 43155.5195 - mae: 171.5252 - val_loss: 41884.2070 - val_mae: 168.4937
Epoch 6/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 41981.3789 - mae: 169.0792 - val_loss: 41382.0391 - val_mae: 166.5803
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x7dde3a96fc10>

In [54]:
# Predict probability of nonzero irradiance
is_daytime = classifier.predict(X_test)

# Predict irradiance only for nonzero cases
irradiance_predictions = regressor.predict(X_test)

# Final predictions: Apply classifier decision
y_pred = np.where(is_daytime > 0.5, irradiance_predictions, 0)  # 0 for nighttime

# Convert predictions into a DataFrame with timestamps
predictions_df = timestamps_test.copy()
predictions_df["DNI_A"] = y_pred  # Assign predicted irradiance

# Print first 100 predictions with date and hour
print(predictions_df.head(100))

# Save predictions to CSV
predictions_df.to_csv("solar_irradiance_predictions.csv", index=False)



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
      Month  Day  Hour       DNI_A
6056      9   10     8  309.631104
5556      8   20    12  316.577179
5990      9    7    14  311.715515
7674     11   16    18    0.000000
3319      5   19     7  359.180939
...     ...  ...   ...         ...
6006      9    8     6    0.000000
8063     12    2    23    0.000000
14        1    1    14  458.620056
5306      8   10     2    0.000000
8524     12   22     4    0.000000

[100 rows x 4 columns]


In [56]:
# Save the classifier model
classifier.save("solar_classifier.keras")

# Save the regressor model
regressor.save("solar_regressor.keras")

Both models have been saved successfully!


In [57]:
#  Load the scalers used during training

with open("scaler_X.pkl", "rb") as f:
    scaler_X = pickle.load(f)
with open("scaler_y.pkl", "rb") as f:
    scaler_y = pickle.load(f)

In [58]:
# Generate timestamps for the first week of March 2025 (hourly)
date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")
future_df = pd.DataFrame({"datetime": date_range})

  date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")


In [59]:
#  Feature Engineering
future_df["day"] = future_df["datetime"].dt.day
future_df["month"] = future_df["datetime"].dt.month
future_df["hour"] = future_df["datetime"].dt.hour
future_df.drop(columns=["datetime"], inplace=True)

In [60]:
# Generate random values
future_df["Temperature_A"] = np.random.uniform(27, 30, size=168)  # Temperature between 15°C to 30°C
future_df["Pressure_A"] = np.random.uniform(1000, 1050, size=168)  # Pressure between 950-1050 hPa
future_df["Surface Albedo_A"] = np.random.uniform(0.1, 0.5, size=168)  # Albedo between 0.1 - 0.5
future_df["Cloud Type_A"] = np.random.randint(0, 4, size=168)  # Cloud types (0-3)

# Merge the first week’s random values with `future_df`
#future_df = future_df.merge(future_df, on=["Month", "Day"], how="left")

# Time-based encoding (sin/cos transformation)
#future_df["hour_sin"] = np.sin(2 * np.pi * future_df["hour"] / 24)
#future_df["hour_cos"] = np.cos(2 * np.pi * future_df["hour"] / 24)

# Select the same features used in training
#X_future = future_df[['hour_sin', 'hour_cos', 'Temperature_A', 'Pressure_A', 'day', 'Month', 'Surface Albedo_A', 'Cloud Type_A']].values

In [61]:
#  Select the same features used in training
X_future = future_df[['hour', 'Temperature_A','Pressure_A', 'day','month', 'Surface Albedo_A', 'Cloud Type_A']].values
#  Scale future data using the trained scaler
X_future_scaled = scaler_X.transform(X_future)
print(X_future_scaled)


[[ 0.          0.57768142  1.78253151 ...  0.18181818  4.81357792
   0.22222222]
 [ 0.04347826  0.61193627  0.9435764  ...  0.18181818 -0.70231808
   0.11111111]
 [ 0.08695652  0.65823203  1.37769548 ...  0.18181818  0.65961953
   0.        ]
 ...
 [ 0.91304348  0.6594482   1.66477893 ...  0.18181818  1.14506669
   0.22222222]
 [ 0.95652174  0.61634149  0.56981801 ...  0.18181818  3.91369435
   0.33333333]
 [ 1.          0.60980707  1.62594164 ...  0.18181818  0.26764292
   0.22222222]]


In [63]:
# Predict probability of nonzero irradiance
is_daytime_future = classifier.predict(X_future_scaled)

# Predict irradiance only for nonzero cases
irradiance_future_scaled = regressor.predict(X_future_scaled)

# Apply classification decision: Set nighttime values to 0
y_future_scaled = np.where(is_daytime_future > 0.5, irradiance_future_scaled, 0)

# Inverse scale predictions
y_future = scaler_y.inverse_transform(y_future_scaled.reshape(-1, 1))

# Set small irradiance values to 0
y_future[y_future < 15] = 0  # Adjust threshold if needed


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [64]:
# Save predictions to DataFrame
future_df[['DNI_A']] = y_future

#  Save to CSV
future_df.to_csv("Future_Solar_Predictions_March2025.csv", index=False)

#Display first 10 predictions
print(future_df.head(10))

   day  month  hour  Temperature_A   Pressure_A  Surface Albedo_A  \
0    1      3     0      27.032414  1044.475945          0.380679   
1    1      3     1      28.149122  1019.307292          0.104884   
2    1      3     2      29.658364  1032.330864          0.172981   
3    1      3     3      28.795129  1041.461667          0.268357   
4    1      3     4      28.888718  1030.502887          0.458593   
5    1      3     5      27.404879  1033.020199          0.452368   
6    1      3     6      28.056800  1001.884452          0.499906   
7    1      3     7      29.903821  1000.348827          0.319502   
8    1      3     8      28.574460  1009.483824          0.446225   
9    1      3     9      28.592449  1043.587005          0.355794   

   Cloud Type_A  DNI_A  
0             2    0.0  
1             1    0.0  
2             0    0.0  
3             1    0.0  
4             0    0.0  
5             3    0.0  
6             1    0.0  
7             3    0.0  
8             0