<a href="https://colab.research.google.com/github/maimuna-noshin/Solar_Prediction/blob/main/solarprediction_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [159]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import get_custom_objects
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pickle
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, BatchNormalization, Flatten, Concatenate, Input
from sklearn.preprocessing import StandardScaler
from scipy.fftpack import fft
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

In [160]:
df=pd.read_csv("/content/updated_dataset.csv")

In [161]:
# Convert to datetime
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']])




In [162]:
# Selecting features (X) and target variables (Y)
features = ['Hour', 'Temperature_A','Pressure_A', 'Day','Month', 'Surface Albedo_A', 'Cloud Type_A']
targets = ['DNI_A']  # Target variables for all three locations

In [163]:

X = df[features].values
y = df[targets].values

In [164]:
# Keep timestamps before scaling
timestamps = df[['Month', 'Day', 'Hour']]

In [165]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler(feature_range=(0, 1000))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Save the feature scaler
with open("scaler_X.pkl", "wb") as f:
    pickle.dump(scaler_X, f)

# Save the target scaler
with open("scaler_y.pkl", "wb") as f:
    pickle.dump(scaler_y, f)

In [166]:
hours_tensor = tf.convert_to_tensor(df['Hour'].values, dtype=tf.float32)


In [167]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test, timestamps_train, timestamps_test = train_test_split(X_scaled, y_scaled,timestamps, test_size=0.2, random_state=42)

In [168]:
def custom_loss_fixed(y_true, y_pred):
    batch_size = tf.shape(y_true)[0]
    batch_hours = tf.gather(hours_tensor, tf.range(batch_size))  # Use hours_tensor instead of hours

    nighttime_mask = tf.logical_or(tf.less_equal(batch_hours, 8), tf.greater_equal(batch_hours, 18))
    nighttime_mask = tf.cast(nighttime_mask, tf.float32)

    mse_loss = tf.keras.losses.MSE(y_true, y_pred)
    penalty_factor = 50.0
    weighted_loss = mse_loss * (1 + (penalty_factor - 1) * nighttime_mask)

    return tf.reduce_mean(weighted_loss)




In [169]:
# Define model
input_layer = Input(shape=(X_train.shape[1],))

# First LSTM layer
x = tf.keras.layers.Reshape((1, X_train.shape[1]))(input_layer)
x = LSTM(384, return_sequences=True, activation='relu')(x)
x = LSTM(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

# Concatenating additional features
external_features = Input(shape=(3,))  # Example additional features
x = Concatenate()([x, external_features])

# Fully connected layers
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(1, activation='linear')(x)

In [170]:
model.compile(optimizer='adam', loss=custom_loss_fixed, metrics=['mae'])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [171]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=8, validation_data=(X_test, y_test))

Epoch 1/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1153573.3750 - mae: 90.3951 - val_loss: 1142749.5000 - val_mae: 87.4332
Epoch 2/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1123528.2500 - mae: 88.6407 - val_loss: 1110231.8750 - val_mae: 89.1345
Epoch 3/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1132848.2500 - mae: 89.1603 - val_loss: 1138464.6250 - val_mae: 87.2228
Epoch 4/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1168109.6250 - mae: 90.2175 - val_loss: 1103359.2500 - val_mae: 87.2553
Epoch 5/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1122659.0000 - mae: 88.5553 - val_loss: 1093984.2500 - val_mae: 85.0047
Epoch 6/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1112751.7500 - mae: 87.0126 - val_loss: 1114385.6250 - val_mae: 85.1219
Epoc

In [172]:
# Predict future solar irradiance for all three locations
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)




[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [173]:
# Convert predictions into a DataFrame
predictions_df = timestamps_test.copy()

# Assign each column separately
predictions_df[['DNI_A']] = y_pred

In [174]:
# Print first 100 predictions with date and hour
print(predictions_df.head(100))
predictions_df.to_csv("solar_irradiance_predictions.csv", index=False)  # Saves to a CSV]

      Month  Day  Hour       DNI_A
6056      9   10     8  205.522263
5556      8   20    12  279.633179
5990      9    7    14  327.695465
7674     11   16    18   35.375530
3319      5   19     7  126.161194
...     ...  ...   ...         ...
6006      9    8     6   32.461113
8063     12    2    23   -1.609088
14        1    1    14  444.504242
5306      8   10     2   -0.486285
8524     12   22     4   -3.583272

[100 rows x 4 columns]


In [175]:
model.save("solar_irradiance_predictions.keras")


model = keras.models.load_model(
    "solar_irradiance_predictions.keras",
    custom_objects={"custom_loss_fixed": custom_loss_fixed},
    safe_mode=False  # Allows deserialization of custom objects
)





In [176]:
#  Load the scalers used during training

with open("scaler_X.pkl", "rb") as f:
    scaler_X = pickle.load(f)
with open("scaler_y.pkl", "rb") as f:
    scaler_y = pickle.load(f)

In [177]:
# Generate timestamps for the first week of March 2025 (hourly)
date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")
future_df = pd.DataFrame({"datetime": date_range})

  date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")


In [178]:
#  Feature Engineering
future_df["day"] = future_df["datetime"].dt.day
future_df["month"] = future_df["datetime"].dt.month
future_df["hour"] = future_df["datetime"].dt.hour
future_df.drop(columns=["datetime"], inplace=True)

In [179]:
# Generate random values
future_df["Temperature_A"] = np.random.uniform(27, 30, size=168)  # Temperature between 15°C to 30°C
future_df["Pressure_A"] = np.random.uniform(1000, 1050, size=168)  # Pressure between 950-1050 hPa
future_df["Surface Albedo_A"] = np.random.uniform(0.1, 0.5, size=168)  # Albedo between 0.1 - 0.5
future_df["Cloud Type_A"] = np.random.randint(0, 4, size=168)  # Cloud types (0-3)

# Merge the first week’s random values with `future_df`
#future_df = future_df.merge(future_df, on=["Month", "Day"], how="left")

# Time-based encoding (sin/cos transformation)
#future_df["hour_sin"] = np.sin(2 * np.pi * future_df["hour"] / 24)
#future_df["hour_cos"] = np.cos(2 * np.pi * future_df["hour"] / 24)

# Select the same features used in training
#X_future = future_df[['hour_sin', 'hour_cos', 'Temperature_A', 'Pressure_A', 'day', 'Month', 'Surface Albedo_A', 'Cloud Type_A']].values

In [180]:
#  Select the same features used in training
X_future = future_df[['hour', 'Temperature_A','Pressure_A', 'day','month', 'Surface Albedo_A', 'Cloud Type_A']].values
#  Scale future data using the trained scaler
X_future_scaled = scaler_X.transform(X_future)
print(X_future_scaled)


[[0.         0.65260525 1.66987706 ... 0.18181818 5.76891769 0.22222222]
 [0.04347826 0.60338931 1.92065054 ... 0.18181818 4.42195445 0.11111111]
 [0.08695652 0.6139477  0.78685885 ... 0.18181818 4.57827657 0.22222222]
 ...
 [0.91304348 0.6573431  1.56540768 ... 0.18181818 2.94530737 0.        ]
 [0.95652174 0.58562713 0.73463397 ... 0.18181818 6.09937358 0.        ]
 [1.         0.66498699 1.47656185 ... 0.18181818 3.30715132 0.        ]]


In [181]:
#  Make predictions
y_future_scaled = model.predict(X_future_scaled)

#  Inverse scale predictions
y_future = scaler_y.inverse_transform(y_future_scaled)
#  Set negative irradiance values to 0
y_future[y_future < 15] = 0

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


In [182]:
# Save predictions to DataFrame
future_df[['DNI_A']] = y_future

#  Save to CSV
future_df.to_csv("Future_Solar_Predictions_March2025.csv", index=False)

#Display first 10 predictions
print(future_df.head(10))

   day  month  hour  Temperature_A   Pressure_A  Surface Albedo_A  \
0    1      3     0      29.474931  1041.096312          0.428446   
1    1      3     1      27.870491  1048.619516          0.361098   
2    1      3     2      28.214695  1014.605765          0.368914   
3    1      3     3      27.668712  1037.341872          0.277792   
4    1      3     4      29.537856  1033.433451          0.155127   
5    1      3     5      27.598807  1045.829387          0.147319   
6    1      3     6      28.304255  1049.520396          0.252733   
7    1      3     7      27.841595  1010.496872          0.315029   
8    1      3     8      28.404141  1005.072825          0.236324   
9    1      3     9      27.343154  1038.143455          0.309538   

   Cloud Type_A       DNI_A  
0             2    0.000000  
1             1    0.000000  
2             2    0.000000  
3             3   21.512358  
4             1    0.000000  
5             3    0.000000  
6             3  268.776825  
