<a href="https://colab.research.google.com/github/maimuna-noshin/Solar_Prediction/blob/main/solarprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import get_custom_objects
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pickle
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, BatchNormalization, Flatten, Bidirectional, LayerNormalization, Input
from sklearn.preprocessing import StandardScaler
from scipy.fftpack import fft
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import Sequential, Model

In [2]:
df=pd.read_csv("/content/updated_dataset.csv")

In [3]:
# Convert to datetime
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']])




In [4]:
# Selecting features (X) and target variables (Y)
features = ['Hour', 'Temperature_A','Pressure_A', 'Day','Month', 'Surface Albedo_A', 'Cloud Type_A']
targets = ['DNI_A']  # Target variables for all three locations

In [5]:

X = df[features].values
y = df[targets].values

In [6]:
# Keep timestamps before scaling
timestamps = df[['Month', 'Day', 'Hour']]

In [7]:
hours_tensor = tf.convert_to_tensor(df['Hour'].values, dtype=tf.float32)


In [8]:
def custom_loss_fixed(y_true, y_pred):
    batch_size = tf.shape(y_true)[0]
    batch_hours = tf.gather(hours_tensor, tf.range(batch_size))  # Use hours_tensor instead of hours

    nighttime_mask = tf.logical_or(tf.less_equal(batch_hours, 8), tf.greater_equal(batch_hours, 18))
    nighttime_mask = tf.cast(nighttime_mask, tf.float32)

    mse_loss = tf.keras.losses.MSE(y_true, y_pred)
    penalty_factor = 50.0
    weighted_loss = mse_loss * (1 + (penalty_factor - 1) * nighttime_mask)

    return tf.reduce_mean(weighted_loss)




In [9]:
scaler_X = MinMaxScaler()
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
scaler_y = MinMaxScaler(feature_range=(0, 1000))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Save the feature scaler
with open("scaler_X.pkl", "wb") as f:
    pickle.dump(scaler_X, f)

# Save the target scaler
with open("scaler_y.pkl", "wb") as f:
    pickle.dump(scaler_y, f)
# Create binary classification labels (0 if DNI_A == 0, 1 if DNI_A > 0)
y_class = (y > 0).astype(int)


In [10]:

# Split Data for classification
X_train_class, X_test_class, y_train_class, y_test_class, timestamps_train, timestamps_test= train_test_split(X_scaled, y_class,timestamps, test_size=0.2, random_state=42)

In [11]:
classifier = Sequential([
    Dense(128, activation="relu", input_shape=(X_train_class.shape[1],)),
    Dense(64, activation="relu"),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")  # Binary classification
])


classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
classifier.fit(X_train_class, y_train_class, epochs=10, batch_size=8, validation_data=(X_test_class, y_test_class))


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.7613 - loss: 0.4466 - val_accuracy: 0.9486 - val_loss: 0.1350
Epoch 2/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9464 - loss: 0.1305 - val_accuracy: 0.9332 - val_loss: 0.1359
Epoch 3/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9440 - loss: 0.1316 - val_accuracy: 0.9492 - val_loss: 0.1207
Epoch 4/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9536 - loss: 0.1109 - val_accuracy: 0.9401 - val_loss: 0.1267
Epoch 5/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9480 - loss: 0.1152 - val_accuracy: 0.9532 - val_loss: 0.1154
Epoch 6/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9568 - loss: 0.1029 - val_accuracy: 0.9486 - val_loss: 0.1003
Epoch 7/10
[1m876/876[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x7fa045d94850>

In [12]:
# Correct Boolean Masking for Selecting Nonzero DNI_A Cases
daytime_mask = y > 0  # Creates a boolean mask where DNI_A > 0
X_day = X_scaled[daytime_mask.flatten()]  # Apply mask to X_scaled
y_day = y[daytime_mask]  # Apply mask to y


In [13]:
# Split Data for Regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_day, y_day, test_size=0.2, random_state=42)

In [14]:
# Regressor Model (Predicts DNI_A for nonzero cases)
regressor = Sequential([
    Dense(128, activation="relu", input_shape=(X_train_reg.shape[1],)),
    Dense(64, activation="relu"),
    Dense(1, activation="linear")
])

regressor.compile(optimizer="adam", loss="mse", metrics=["mae"])
regressor.fit(X_train_reg, y_train_reg, epochs=10, batch_size=8, validation_data=(X_test_reg, y_test_reg))


Epoch 1/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 138060.3906 - mae: 305.7543 - val_loss: 44894.2539 - val_mae: 175.0087
Epoch 2/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 47233.7383 - mae: 181.1932 - val_loss: 42990.2773 - val_mae: 170.9811
Epoch 3/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 44499.6914 - mae: 176.1355 - val_loss: 42077.2109 - val_mae: 168.4963
Epoch 4/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 45466.9219 - mae: 177.7429 - val_loss: 41707.6484 - val_mae: 167.1887
Epoch 5/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 43363.5703 - mae: 172.6531 - val_loss: 41973.8984 - val_mae: 166.8923
Epoch 6/10
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 43063.1797 - mae: 170.9484 - val_loss: 41464.6250 - val_mae: 166.9239
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x7fa0441f4d90>

In [15]:
# Predict probability of nonzero irradiance
is_daytime_mask = classifier.predict(X_test_class) > 0.48  # Boolean mask for daytime

# Ensure X_test_reg and is_daytime_mask have compatible sizes
min_size = min(len(is_daytime_mask), len(X_test_reg))
is_daytime_mask = is_daytime_mask[:min_size]
X_test_reg = X_test_reg[:min_size]

# Filter indices where it's daytime
daytime_indices = np.where(is_daytime_mask)[0]

# Predict irradiance only for daytime cases
X_test_reg_daytime = X_test_reg[daytime_indices]  # Select only daytime data
irradiance_predictions = regressor.predict(X_test_reg_daytime).flatten()

# Initialize final prediction array with zeros
y_pred = np.zeros(min_size, dtype=float)

# Assign irradiance predictions only to daytime indices
y_pred[daytime_indices] = irradiance_predictions  # Map regression predictions correctly

# Convert predictions into a DataFrame with timestamps
predictions_df = timestamps_test.iloc[:min_size].copy()
predictions_df["DNI_A"] = y_pred  # Assign predicted irradiance

# Print first 100 predictions with date and hour
print(predictions_df.head(100))

# Save predictions to CSV
predictions_df.to_csv("solar_irradiance_predictions.csv", index=False)



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
      Month  Day  Hour       DNI_A
6056      9   10     8  433.437897
5556      8   20    12  363.878052
5990      9    7    14  306.184784
7674     11   16    18    0.000000
3319      5   19     7  289.952545
...     ...  ...   ...         ...
6006      9    8     6    0.000000
8063     12    2    23    0.000000
14        1    1    14  295.538116
5306      8   10     2    0.000000
8524     12   22     4    0.000000

[100 rows x 4 columns]


In [16]:
# Save the classifier model
classifier.save("solar_classifier.keras")

# Save the regressor model
regressor.save("solar_regressor.keras")

In [17]:
#  Load the scalers used during training

with open("scaler_X.pkl", "rb") as f:
    scaler_X = pickle.load(f)
with open("scaler_y.pkl", "rb") as f:
    scaler_y = pickle.load(f)

In [18]:
# Generate timestamps for the first week of March 2025 (hourly)
date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")
future_df = pd.DataFrame({"datetime": date_range})

  date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")


In [19]:
#  Feature Engineering
future_df["day"] = future_df["datetime"].dt.day
future_df["month"] = future_df["datetime"].dt.month
future_df["hour"] = future_df["datetime"].dt.hour
future_df.drop(columns=["datetime"], inplace=True)

In [20]:
# Generate random values
future_df["Temperature_A"] = np.random.uniform(27, 30, size=168)  # Temperature between 15°C to 30°C
future_df["Pressure_A"] = np.random.uniform(1000, 1050, size=168)  # Pressure between 950-1050 hPa
future_df["Surface Albedo_A"] = np.random.uniform(0.1, 0.5, size=168)  # Albedo between 0.1 - 0.5
future_df["Cloud Type_A"] = np.random.randint(0, 4, size=168)  # Cloud types (0-3)

# Merge the first week’s random values with `future_df`
#future_df = future_df.merge(future_df, on=["Month", "Day"], how="left")

# Time-based encoding (sin/cos transformation)
#future_df["hour_sin"] = np.sin(2 * np.pi * future_df["hour"] / 24)
#future_df["hour_cos"] = np.cos(2 * np.pi * future_df["hour"] / 24)

# Select the same features used in training
#X_future = future_df[['hour_sin', 'hour_cos', 'Temperature_A', 'Pressure_A', 'day', 'Month', 'Surface Albedo_A', 'Cloud Type_A']].values

In [21]:
#  Select the same features used in training
X_future = future_df[['hour', 'Temperature_A','Pressure_A', 'day','month', 'Surface Albedo_A', 'Cloud Type_A']].values
#  Scale future data using the trained scaler
X_future_scaled = scaler_X.transform(X_future)
print(X_future_scaled)


[[ 0.          0.61848961  0.8562872  ...  0.18181818  4.01275518
   0.33333333]
 [ 0.04347826  0.58016659  0.49831183 ...  0.18181818 -0.30628017
   0.        ]
 [ 0.08695652  0.66787382  1.53648516 ...  0.18181818  3.65811754
   0.        ]
 ...
 [ 0.91304348  0.58426302  1.79717254 ...  0.18181818  1.00062327
   0.        ]
 [ 0.95652174  0.59776486  0.98940452 ...  0.18181818  1.7792387
   0.22222222]
 [ 1.          0.59616037  1.74181025 ...  0.18181818  4.68524195
   0.        ]]


In [22]:
# Predict probability of nonzero irradiance
is_daytime_future = classifier.predict(X_future_scaled) > 0.48  # Boolean mask for daytime

# Ensure X_future_scaled and is_daytime_future have compatible sizes
min_size_future = min(len(is_daytime_future), len(X_future_scaled))
is_daytime_future = is_daytime_future[:min_size_future]
X_future_scaled = X_future_scaled[:min_size_future]

# Filter indices where it's daytime
daytime_indices_future = np.where(is_daytime_future)[0]

# Predict irradiance only for daytime cases
X_future_scaled_daytime = X_future_scaled[daytime_indices_future]
irradiance_future_scaled = regressor.predict(X_future_scaled_daytime).flatten()

# Initialize final prediction array with zeros
y_future_scaled = np.zeros(min_size_future, dtype=float)

# Assign irradiance predictions only to daytime indices
y_future_scaled[daytime_indices_future] = irradiance_future_scaled

# Inverse scale predictions
y_future = scaler_y.inverse_transform(y_future_scaled.reshape(-1, 1))

# Set small irradiance values to 0
y_future[y_future < 15] = 0  # Adjust threshold if needed



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [23]:
# Save predictions to DataFrame
future_df[['DNI_A']] = y_future

#  Save to CSV
future_df.to_csv("Future_Solar_Predictions_March2025.csv", index=False)

#Display first 10 predictions
print(future_df.head(10))

   day  month  hour  Temperature_A   Pressure_A  Surface Albedo_A  \
0    1      3     0      28.362761  1016.688616          0.340638   
1    1      3     1      27.113431  1005.949355          0.124686   
2    1      3     2      29.972687  1037.094555          0.322906   
3    1      3     3      27.824989  1043.303463          0.312486   
4    1      3     4      28.253115  1034.613235          0.340302   
5    1      3     5      29.733434  1008.041257          0.220379   
6    1      3     6      27.371998  1003.487896          0.498159   
7    1      3     7      27.985930  1034.119907          0.428380   
8    1      3     8      28.318749  1016.225100          0.469141   
9    1      3     9      28.153736  1002.829145          0.399765   

   Cloud Type_A       DNI_A  
0             3    0.000000  
1             0    0.000000  
2             0    0.000000  
3             0    0.000000  
4             1    0.000000  
5             1    0.000000  
6             1  423.923126  
