## Load Data

In [1]:
import pandas as pd

# Load the Excel file
file_path = 'data/sky_camera.xlsx'
data = pd.read_excel(file_path)


In [2]:
# Display the first few rows of the dataset
data.head()

Unnamed: 0,File Name,File Name.1,Timestamp,real GHI
0,20221031013000_0.jpg,130,1:30,0.0
1,20221031013100_0.jpg,131,1:31,0.0
2,20221031013200_0.jpg,132,1:32,0.0
3,20221031013300_0.jpg,133,1:33,0.0
4,20221031013400_0.jpg,134,1:34,0.0


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 880 entries, 0 to 879
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   File Name    880 non-null    object 
 1   File Name.1  880 non-null    int64  
 2   Timestamp    880 non-null    object 
 3   real GHI     880 non-null    float64
dtypes: float64(1), int64(1), object(2)
memory usage: 27.6+ KB


In [4]:
data.shape

(880, 4)

##  Preprocess the Data

In [5]:
image_path='data/images'

# Create the new 'image_path' column
data['image_path'] = data['File Name'].apply(lambda x: f'data/images/{x}')

In [6]:
data.head()

Unnamed: 0,File Name,File Name.1,Timestamp,real GHI,image_path
0,20221031013000_0.jpg,130,1:30,0.0,data/images/20221031013000_0.jpg
1,20221031013100_0.jpg,131,1:31,0.0,data/images/20221031013100_0.jpg
2,20221031013200_0.jpg,132,1:32,0.0,data/images/20221031013200_0.jpg
3,20221031013300_0.jpg,133,1:33,0.0,data/images/20221031013300_0.jpg
4,20221031013400_0.jpg,134,1:34,0.0,data/images/20221031013400_0.jpg


In [7]:
data.columns

Index(['File Name', 'File Name.1', 'Timestamp', 'real GHI', 'image_path'], dtype='object')

In [8]:
data.rename(columns={
    'File Name' : 'file_name',
    'File Name.1' : 'file_name1',
    'Timestamp': 'timestamp',
    'real GHI' : 'ghi'
}, inplace=True)

In [9]:
data.head()

Unnamed: 0,file_name,file_name1,timestamp,ghi,image_path
0,20221031013000_0.jpg,130,1:30,0.0,data/images/20221031013000_0.jpg
1,20221031013100_0.jpg,131,1:31,0.0,data/images/20221031013100_0.jpg
2,20221031013200_0.jpg,132,1:32,0.0,data/images/20221031013200_0.jpg
3,20221031013300_0.jpg,133,1:33,0.0,data/images/20221031013300_0.jpg
4,20221031013400_0.jpg,134,1:34,0.0,data/images/20221031013400_0.jpg


In [10]:
import os

# Function to check if the file exists
def check_file_existence(file_path):
    return 'Yes' if os.path.exists(file_path) else 'No'

In [11]:
# Create the new 'file_existance' column
data['file_existance'] = data['image_path'].apply(check_file_existence)

In [12]:
data.head()

Unnamed: 0,file_name,file_name1,timestamp,ghi,image_path,file_existance
0,20221031013000_0.jpg,130,1:30,0.0,data/images/20221031013000_0.jpg,Yes
1,20221031013100_0.jpg,131,1:31,0.0,data/images/20221031013100_0.jpg,Yes
2,20221031013200_0.jpg,132,1:32,0.0,data/images/20221031013200_0.jpg,Yes
3,20221031013300_0.jpg,133,1:33,0.0,data/images/20221031013300_0.jpg,Yes
4,20221031013400_0.jpg,134,1:34,0.0,data/images/20221031013400_0.jpg,Yes


In [13]:
data_filtered = data[data['file_existance'] == 'Yes']

In [14]:
data_filtered.shape

(880, 6)

In [15]:
#!pip3 install opencv-python
#!pip install tensorflow

In [16]:
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [17]:
# Assuming 'data' DataFrame contains 'image_path' and 'ghi' columns
# Preprocess images and GHI values
image_size = (128, 128)  # Resize to 128x128

In [18]:
def preprocess_image(img_path):
    image = cv2.imread(img_path)
    image = cv2.resize(image, image_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image

In [19]:
# Read images and GHI values
images = np.array([preprocess_image(path) for path in data_filtered['image_path'].values])
ghi_values = data_filtered['ghi'].values

In [20]:
# Normalize GHI values
ghi_values = ghi_values / ghi_values.max()

In [21]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(images, ghi_values, test_size=0.2, random_state=42)

# Build a CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  # Regression output
])

model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 265ms/step - loss: 1.9947 - mae: 0.8718 - val_loss: 0.1370 - val_mae: 0.3205
Epoch 2/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 239ms/step - loss: 0.1339 - mae: 0.3091 - val_loss: 0.0418 - val_mae: 0.1833
Epoch 3/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 244ms/step - loss: 0.0499 - mae: 0.1795 - val_loss: 0.0180 - val_mae: 0.1053
Epoch 4/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 249ms/step - loss: 0.0294 - mae: 0.1309 - val_loss: 0.0097 - val_mae: 0.0847
Epoch 5/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 249ms/step - loss: 0.0251 - mae: 0.1206 - val_loss: 0.0090 - val_mae: 0.0803
Epoch 6/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 248ms/step - loss: 0.0238 - mae: 0.1147 - val_loss: 0.0071 - val_mae: 0.0668
Epoch 7/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 247ms/step - 

In [23]:
# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f'Validation MAE: {mae}')

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0045 - mae: 0.0556
Validation MAE: 0.05860792472958565


In [25]:
# Save the model
model.save('models/ghi_prediction_model.keras')