In [139]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
#from tensorflow.keras.models import load_model
import os

In [140]:
# Load test dataset
file_path = 'data/sky_camera.xlsx'
test_df = pd.read_excel(file_path)

In [141]:
test_df.shape

(880, 4)

In [142]:
image_path='data/images'

# Create the new 'image_path' column
test_df['image_path'] = test_df['File Name'].apply(lambda x: f'data/images/{x}')

In [143]:
test_df.columns

Index(['File Name', 'File Name.1', 'Timestamp', 'real GHI', 'image_path'], dtype='object')

In [144]:
test_df.head()

Unnamed: 0,File Name,File Name.1,Timestamp,real GHI,image_path
0,20221031013000_0.jpg,130,1:30,0.0,data/images/20221031013000_0.jpg
1,20221031013100_0.jpg,131,1:31,0.0,data/images/20221031013100_0.jpg
2,20221031013200_0.jpg,132,1:32,0.0,data/images/20221031013200_0.jpg
3,20221031013300_0.jpg,133,1:33,0.0,data/images/20221031013300_0.jpg
4,20221031013400_0.jpg,134,1:34,0.0,data/images/20221031013400_0.jpg


In [145]:
test_df.rename(columns={
    'File Name' : 'file_name',
    'File Name.1' : 'file_name1',
    'Timestamp': 'timestamp',
    'real GHI' : 'ghi'
}, inplace=True)

In [146]:
test_df.head()

Unnamed: 0,file_name,file_name1,timestamp,ghi,image_path
0,20221031013000_0.jpg,130,1:30,0.0,data/images/20221031013000_0.jpg
1,20221031013100_0.jpg,131,1:31,0.0,data/images/20221031013100_0.jpg
2,20221031013200_0.jpg,132,1:32,0.0,data/images/20221031013200_0.jpg
3,20221031013300_0.jpg,133,1:33,0.0,data/images/20221031013300_0.jpg
4,20221031013400_0.jpg,134,1:34,0.0,data/images/20221031013400_0.jpg


In [147]:
import os

# Function to check if the file exists
def check_file_existence(file_path):
    return 'Yes' if os.path.exists(file_path) else 'No'

In [148]:
# Create the new 'file_existance' column
test_df['file_existance'] = test_df['image_path'].apply(check_file_existence)

In [149]:
data_filtered = test_df[test_df['file_existance'] == 'Yes']

In [150]:
data_filtered.shape

(880, 6)

In [151]:
data_filtered['ghi'].describe()

count    880.000000
mean     337.465337
std      296.806543
min        0.000000
25%        0.001200
50%      314.072100
75%      625.448625
max      801.920600
Name: ghi, dtype: float64

In [152]:
# Define image size used in model
image_size = (250, 250)  # Adjust based on the size used during model training

In [153]:
# Function to load and preprocess images
def preprocess_image(img_path):
    image = load_img(img_path, target_size=image_size)
    image = img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

In [154]:
# -- features
X_test = np.array([preprocess_image(path) for path in data_filtered['image_path'].values])

# -- Target 
y_test = data_filtered['ghi'].values

In [155]:
max_ghi = y_test.max()

print(f"GHI Max = {max_ghi}")

# Normalize GHI values
y_test = y_test / max_ghi

GHI Max = 801.9206


In [156]:
import joblib

# Load the saved best model
model = joblib.load('models/tuned_ghi_prediction_model.pkl')

In [157]:
# Predict GHI values
predicted_ghi = model.predict(X_test)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 551ms/step


In [158]:
# Calculate the difference between predicted and actual GHI values
difference = y_test*max_ghi - predicted_ghi.flatten()*max_ghi


In [159]:
data_filtered.columns

Index(['file_name', 'file_name1', 'timestamp', 'ghi', 'image_path',
       'file_existance'],
      dtype='object')

In [160]:
# Create a DataFrame for results
results_df = pd.DataFrame({
    'file_name': data_filtered['file_name'],
    'timestamp': data_filtered['timestamp'],
    'image_path': data_filtered['image_path'],
    'ghi': y_test * max_ghi,
    'predicted_ghi': (predicted_ghi.flatten())*max_ghi,
    'difference': difference
})


In [161]:
results_df.head()

Unnamed: 0,file_name,timestamp,image_path,ghi,predicted_ghi,difference
0,20221031013000_0.jpg,1:30,data/images/20221031013000_0.jpg,0.0,8.125846,-8.125846
1,20221031013100_0.jpg,1:31,data/images/20221031013100_0.jpg,0.0,7.688019,-7.688019
2,20221031013200_0.jpg,1:32,data/images/20221031013200_0.jpg,0.0,2.937726,-2.937726
3,20221031013300_0.jpg,1:33,data/images/20221031013300_0.jpg,0.0,6.921663,-6.921663
4,20221031013400_0.jpg,1:34,data/images/20221031013400_0.jpg,0.0,10.505911,-10.505911


In [162]:
# Save the results to a CSV file
results_df.to_csv('data/ghi_predictions.csv', index=False)