<a href="https://colab.research.google.com/github/maimuna-noshin/Solar_Prediction/blob/main/solarprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Solar Prediction

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pickle
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, BatchNormalization, Flatten
from sklearn.preprocessing import StandardScaler
from scipy.fftpack import fft
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

In [None]:
df=pd.read_csv("/content/updated_dataset.csv")

In [None]:
# Convert to datetime
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']])


# Extract hour and apply time-based encoding
df['hour_sin'] = np.sin(2 * np.pi * df['Hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['Hour'] / 24)

# Define nighttime hours (6 PM - 6 AM) and set irradiance to zero
df['nighttime'] = df['Hour'].isin(list(range(0, 7)) + list(range(18, 24)))
df.loc[df['nighttime'], ['DNI_A', 'DNI_D', 'DNI_M']] = 0

In [None]:
# Selecting features (X) and target variables (Y)
features = ['hour_sin', 'hour_cos', 'Temperature_A','Pressure_A', 'Day','Month', 'Surface Albedo_A', 'Cloud Type_A']
targets = ['DNI_A']  # Target variables for all three locations

In [None]:

X = df[features].values
y = df[targets].values

In [None]:
# Keep timestamps before scaling
timestamps = df[['Month', 'Day', 'Hour']]

In [None]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler(feature_range=(0, 1000))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Save the feature scaler
with open("scaler_X.pkl", "wb") as f:
    pickle.dump(scaler_X, f)

# Save the target scaler
with open("scaler_y.pkl", "wb") as f:
    pickle.dump(scaler_y, f)

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test, timestamps_train, timestamps_test = train_test_split(X_scaled, y_scaled,timestamps, test_size=0.2, random_state=42)
print(X_train.max())

1.0


In [None]:
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(len(targets), activation='linear')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile the model
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=8, validation_data=(X_test, y_test))

Epoch 1/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 59899.9531 - mae: 160.6332 - val_loss: 28426.1445 - val_mae: 93.6095
Epoch 2/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 27951.7930 - mae: 96.6261 - val_loss: 27334.7148 - val_mae: 94.5557
Epoch 3/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 29735.0352 - mae: 99.9822 - val_loss: 25960.7578 - val_mae: 90.7239
Epoch 4/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 27599.1309 - mae: 93.7181 - val_loss: 25556.1914 - val_mae: 87.9891
Epoch 5/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 28166.7070 - mae: 93.0996 - val_loss: 25520.3262 - val_mae: 87.7081
Epoch 6/10
[1m876/876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 28003.6348 - mae: 93.4523 - val_loss: 25485.0488 - val_mae: 87.0272
Epoch 7/10
[1m876/876[0m 

In [None]:
# Predict future solar irradiance for all three locations
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
# Set negative irradiance values to 0
y_pred[y_pred < 1] = 0
# Apply nighttime condition to predicted values
nighttime_indices = X_test[:, 1] == 0  # Check if the 'hour_sin' feature indicates nighttime
y_pred[nighttime_indices, :] = 0  # Set all predictions to 0 during nighttime

[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [None]:
# Convert predictions into a DataFrame
predictions_df = timestamps_test.copy()

# Assign each column separately
predictions_df[['DNI_A']] = y_pred

In [None]:
# Print first 100 predictions with date and hour
print(predictions_df.head(100))
predictions_df.to_csv("solar_irradiance_predictions.csv", index=False)  # Saves to a CSV]

      Month  Day  Hour       DNI_A
6056      9   10     8  233.309006
5556      8   20    12    0.000000
5990      9    7    14  307.345459
7674     11   16    18    4.429890
3319      5   19     7  124.877922
...     ...  ...   ...         ...
6006      9    8     6    0.000000
8063     12    2    23    0.000000
14        1    1    14  463.925720
5306      8   10     2    0.000000
8524     12   22     4    0.000000

[100 rows x 4 columns]


In [None]:
# Save using the new format:
model.save("solar_irradiance_predictions.keras")  # Or model.save("my_model") - the .keras is optional

# Load using the new format:
model = keras.models.load_model("solar_irradiance_predictions.keras")  # Or model = keras.models.load_model("my_model")

In [None]:
#  Load the scalers used during training

with open("scaler_X.pkl", "rb") as f:
    scaler_X = pickle.load(f)
with open("scaler_y.pkl", "rb") as f:
    scaler_y = pickle.load(f)

In [None]:
# Generate timestamps for the first week of March 2025 (hourly)
date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")
future_df = pd.DataFrame({"datetime": date_range})

  date_range = pd.date_range(start="2025-03-01", end="2025-03-07 23:00:00", freq="H")


In [None]:
# Generate random values
future_df["Temperature_A"] = np.random.uniform(27, 30, size=168)  # Temperature between 15°C to 30°C
future_df["Pressure_A"] = np.random.uniform(1000, 1050, size=168)  # Pressure between 950-1050 hPa
future_df["Surface Albedo_A"] = np.random.uniform(0.1, 0.5, size=168)  # Albedo between 0.1 - 0.5
future_df["Cloud Type_A"] = np.random.randint(0, 4, size=168)  # Cloud types (0-3)

# Merge the first week’s random values with `future_df`
#future_df = future_df.merge(future_df, on=["Month", "Day"], how="left")

# Time-based encoding (sin/cos transformation)
#future_df["hour_sin"] = np.sin(2 * np.pi * future_df["hour"] / 24)
#future_df["hour_cos"] = np.cos(2 * np.pi * future_df["hour"] / 24)

# Select the same features used in training
#X_future = future_df[['hour_sin', 'hour_cos', 'Temperature_A', 'Pressure_A', 'day', 'Month', 'Surface Albedo_A', 'Cloud Type_A']].values

In [None]:
#  Feature Engineering
future_df["day"] = future_df["datetime"].dt.day
future_df["month"] = future_df["datetime"].dt.month
future_df["hour"] = future_df["datetime"].dt.hour
future_df.drop(columns=["datetime"], inplace=True)
print(future_df)

     Temperature_A   Pressure_A  Surface Albedo_A  Cloud Type_A  day  month  \
0        28.333113  1023.655659          0.307955             2    1      3   
1        28.993038  1005.952408          0.127955             2    1      3   
2        28.152701  1012.844820          0.445678             0    1      3   
3        27.911094  1029.381624          0.409421             1    1      3   
4        29.581825  1015.295630          0.477750             2    1      3   
..             ...          ...               ...           ...  ...    ...   
163      27.696067  1021.412293          0.160544             1    7      3   
164      29.988212  1010.416955          0.453453             3    7      3   
165      27.039104  1033.847837          0.331844             3    7      3   
166      29.004933  1048.215513          0.429559             1    7      3   
167      29.130454  1018.775760          0.175378             3    7      3   

     hour  
0       0  
1       1  
2       2  
3  

In [None]:
# Time-based encoding (sin/cos transformation)
future_df["hour_sin"] = np.sin(2 * np.pi * future_df["hour"] / 24)
future_df["hour_cos"] = np.cos(2 * np.pi * future_df["hour"] / 24)

In [None]:
#  Select the same features used in training
X_future = future_df[['hour_sin', 'hour_cos', 'Temperature_A','Pressure_A', 'day','month', 'Surface Albedo_A', 'Cloud Type_A']].values
#  Scale future data using the trained scaler
X_future_scaled = scaler_X.transform(X_future)
print(X_future_scaled)


[[ 0.5         1.          0.61758015 ...  0.18181818  3.35909429
   0.22222222]
 [ 0.62940952  0.98296291  0.63782324 ...  0.18181818 -0.24090413
   0.22222222]
 [ 0.75        0.9330127   0.61204603 ...  0.18181818  6.11356249
   0.        ]
 ...
 [ 0.14644661  0.85355339  0.57788661 ...  0.18181818  3.83688886
   0.33333333]
 [ 0.25        0.9330127   0.63818812 ...  0.18181818  5.79118346
   0.11111111]
 [ 0.37059048  0.98296291  0.64203845 ...  0.18181818  0.7075579
   0.33333333]]


In [None]:
#  Make predictions
y_future_scaled = model.predict(X_future_scaled)
print(y_future_scaled)

#  Inverse scale predictions
y_future = scaler_y.inverse_transform(y_future_scaled)
print(y_future)
#  Set negative irradiance values to 0
y_future[y_future < 1] = 0
print(y_future)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[[-8.32570553e+00]
 [ 7.47584641e-01]
 [ 8.00893326e+01]
 [ 1.83561554e+02]
 [ 4.89122253e+02]
 [ 1.76697998e+02]
 [ 8.44021378e+01]
 [ 1.05241455e+03]
 [ 1.04058484e+03]
 [ 5.99540894e+02]
 [ 2.82916016e+02]
 [ 1.07699805e+03]
 [ 1.42041479e+03]
 [ 7.63961060e+02]
 [ 8.21534058e+02]
 [ 1.11703577e+03]
 [ 1.17911646e+03]
 [ 1.07019653e+03]
 [ 8.80736771e+01]
 [ 1.64848877e+02]
 [ 4.97191727e-01]
 [-3.21490669e+01]
 [-2.33065472e+01]
 [ 7.06527054e-01]
 [-3.09285998e+00]
 [ 4.71440613e-01]
 [ 4.22207594e+00]
 [ 9.84349346e+00]
 [ 4.92256042e+02]
 [ 7.14981985e+00]
 [ 7.09693726e+02]
 [ 6.26991089e+02]
 [ 6.39963867e+02]
 [ 6.01654175e+02]
 [ 1.24004077e+03]
 [ 1.38188062e+03]
 [ 1.26392114e+03]
 [ 8.81789612e+02]
 [ 8.18575806e+02]
 [ 9.44570068e+02]
 [ 8.69063110e+02]
 [ 6.73220215e+02]
 [ 5.73885803e+02]
 [ 8.25072632e+02]
 [ 4.85285858e+02]
 [ 1.59261703e+02]
 [ 7.13565171e-01]
 [ 4.34148741e+00]
 [ 1.19179845e+0

In [None]:
# Save predictions to DataFrame
future_df[['DNI_A']] = y_future

#  Save to CSV
future_df.to_csv("Future_Solar_Predictions_March2025.csv", index=False)

#Display first 10 predictions
print(future_df.head(10))

   Temperature_A   Pressure_A  Surface Albedo_A  Cloud Type_A  day  month  \
0      28.333113  1023.655659          0.307955             2    1      3   
1      28.993038  1005.952408          0.127955             2    1      3   
2      28.152701  1012.844820          0.445678             0    1      3   
3      27.911094  1029.381624          0.409421             1    1      3   
4      29.581825  1015.295630          0.477750             2    1      3   
5      29.623727  1047.293189          0.222463             1    1      3   
6      27.330465  1042.013080          0.114345             1    1      3   
7      27.413788  1035.718695          0.433074             1    1      3   
8      29.631617  1005.006870          0.454535             1    1      3   
9      29.481597  1043.427033          0.114172             0    1      3   

   hour  hour_sin      hour_cos       DNI_A  
0     0  0.000000  1.000000e+00    0.000000  
1     1  0.258819  9.659258e-01    0.000000  
2     2  0.500