In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from src_nowcasting import image_preprocessing, sequence_img_generator, get_models
from datetime import datetime
from pvlib import location, solarposition
import math

### Data pre-processing

- add explaination

In [2]:
# PATH_INPUT_FOLDER = r'D:\001_Nowcasting\IR_images_nowcasting'
# PATH_OUTPUT_FOLDER = r'D:\001_Nowcasting\IR_images_postprocess'

# pre_processor = image_preprocessing.PreProcessImage()

# # go through all the days in the folder
# for day in tqdm(os.listdir(PATH_INPUT_FOLDER)):    

#    # go through all the images in the day
#     files = os.listdir(os.path.join(PATH_INPUT_FOLDER, day))

#     for f in files:

#         # Load image
#         in_path = os.path.join(PATH_INPUT_FOLDER, day, f)
#         image =  cv2.imread(in_path, cv2.IMREAD_UNCHANGED)

#         # Transform image
#         new_image = pre_processor.transform_image(image)

#         # Save image
#         folder_path = os.path.join(PATH_OUTPUT_FOLDER, day)
#         # Check if exists and if not create folder
#         if not os.path.exists(folder_path): os.mkdir(folder_path)
#         out_path = os.path.join(folder_path, f.split('.')[0]+'.jpg')

#         cv2.imwrite(out_path, new_image)

In [3]:
# df_data = sequence_img_generator.generate_dataframe(r'C:\Users\Admin\Code\maciej-solar-nowcasting\dataset\sensors', 15)

### Training

In [4]:
from sklearn import model_selection, metrics
from tensorflow.keras import callbacks
from tensorflow.keras import optimizers
import tensorflow as tf
import mlflow

In [5]:
tf.keras.backend.clear_session()
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [6]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("sCNN_best_model_tests")

<Experiment: artifact_location='file:///c:/Users/Admin/Code/maciej-solar-nowcasting/nowcasting/mlruns/4', creation_time=1696335798033, experiment_id='4', last_update_time=1696335798033, lifecycle_stage='active', name='sCNN_best_model_tests', tags={}>

Parameters

In [7]:
MODEL_TYPE = 'scnn'                                         # model_type chosen for the training.
FORECAST_HORIZON = 30                                       # time horizon.
NO_IMAGES = 3
EPSILON = 1e-3

# Image parameters
img_size = [128, 128]                                       # image size.
img_channels = 1                                            # image channels.
ELEVATION_THRESHOLD = 20

train_batchsize = 32                                        # batch size for train.
test_batchsize = 1                                          # batch size for test.
epochs = 100                                                # maximum number of epochs.
TRAIN_SIZE = 0.8
I = 0

# Paths
# path = constants.REGR_SEQ_DATASET_DIR                                                  
WEIGHT_PATH = r'.\model\weights'          # path to the weight.
LOG_PATH = None                     # path to save the CSV file.
CHECKPOINT_PATH = r'.\model\checkpoints'
IMAGE_PATH = r'C:\Users\Admin\Code\maciej-solar-nowcasting\dataset\IR_images_postprocess'

# root_logdir = os.path.join(os.curdir, "run_regression/my_logs")

params1 = {'batch_size': train_batchsize,
           'dim': (img_size[0], img_size[1], 1 * NO_IMAGES),
           'channel_IMG': img_channels,
           'shuffle': False,
           'iftest': False}

params2 = {'batch_size': train_batchsize,
           'dim': (img_size[0], img_size[1], 1 * NO_IMAGES),
           'channel_IMG': img_channels,
           'iftest': False}

params3 = {'batch_size': test_batchsize,
           'dim': (img_size[0], img_size[1], 1 * NO_IMAGES),
           'channel_IMG': img_channels,
           'shuffle': False,
           'iftest': False}

Test models

In [8]:
models = {
   'SCNN': get_models.SCNN(input_shape=[img_size[0], img_size[1], NO_IMAGES]),
   'SCNN_small_v2': get_models.SCNN_small_2(input_shape=[img_size[0], img_size[1], NO_IMAGES]),
}

Prepare the DF

In [9]:
df_data = pd.read_parquet(r'..\dataset\df_data_15.parquet.gzip')

# Define the Target column
df_data['Target'] = df_data.Target_CSI

# Remove the data with low elevation
df_data_reduced = df_data[df_data.elevation > ELEVATION_THRESHOLD]


df_train_full, df_test = model_selection.train_test_split(df_data_reduced, train_size=TRAIN_SIZE, shuffle=False)
df_train, df_val = model_selection.train_test_split(df_train_full, train_size=TRAIN_SIZE, shuffle=False)

train_generator = sequence_img_generator.DataGeneratorGHI_SCNN(df_train, IMAGE_PATH, **params1)

val_generator = sequence_img_generator.DataGeneratorGHI_SCNN(df_val, IMAGE_PATH, **params2)

In [10]:
# Test cases

# Sunny day test - 19/08/2023. 23/08/2023
# Partially cloudy day test - 26/08/2023, 29/08/2023
# Mostly cloudy / rainy day test - 27/08/2023

df_test_1 = df_test[df_test.date.dt.date == datetime(2023, 8, 19).date()].copy() # Sunny day
df_test_2 = df_test[df_test.date.dt.date == datetime(2023, 8, 23).date()].copy() # Sunny day
df_test_3 = df_test[df_test.date.dt.date == datetime(2023, 8, 26).date()].copy() # Partially cloudy day
df_test_4 = df_test[df_test.date.dt.date == datetime(2023, 8, 27).date()].copy() # Mostly cloudy / rainy day
df_test_5 = df_test[df_test.date.dt.date == datetime(2023, 8, 29).date()].copy() # Partially cloudy day

test_cases = [df_test_1, df_test_2, df_test_3, df_test_4, df_test_5]

In [11]:
mlflow.tensorflow.autolog(disable=True)

# Parameters
BETA_1 = 0.9
BETA_2 = 0.999

LEARNING_RATE_START = 0.0003
LOSS = 'mean_squared_error'

def exponential_decay_fn(epoch, learning_rate=LEARNING_RATE_START):
    return learning_rate * 0.1**(epoch / 20)

RUN_ID = 1

for m in models:
    
    tf.keras.backend.clear_session()
    with mlflow.start_run(run_name=f'run_{RUN_ID:03d}_{m}_scaled_15, lr: 0.0003, loss: {LOSS}'):
        params = {
            'forecast_horizon': FORECAST_HORIZON,
            'elevation_threshold': ELEVATION_THRESHOLD,
            'model_type': MODEL_TYPE,
            'learning_rate': '1C',
            'beta_1': BETA_1,
            'beta_2': BETA_2,
            'loss': 'mean_squared_error',
                
        }
            
        callbacks_list = []
        model = models[m]
        
        # Logging
        if LOG_PATH: 
            callbacks_list.append(callbacks.CSVLogger(os.path.join(LOG_PATH, f'training_id_{m}_{LEARNING_RATE_START}.csv')))
        # Checkpointing
        if CHECKPOINT_PATH:
            callbacks_list.append(callbacks.ModelCheckpoint(
                filepath=os.path.join(CHECKPOINT_PATH, f'training_id_{m}_scaled_15_0.0003_{LOSS}.h5'),
                verbose = 1,
                save_best_only = True,
                ))

        # Early stopping
        callbacks_list.append(callbacks.EarlyStopping(monitor='val_loss', patience=10))
        # Learing rate reduction scheduler
        # callbacks_list.append(get_models.OneCycleScheduler(math.ceil(len(df_train) / train_batchsize) * epochs, max_rate = 0.0005))
        # callbacks_list.append(tf.keras.callbacks.LearningRateScheduler(exponential_decay_fn))
                    
        optimizer = optimizers.Adam(
            learning_rate=LEARNING_RATE_START, 
            beta_1=0.9, 
            beta_2=0.999, 
            amsgrad=False
            )

        model.compile(
            optimizer=optimizer, 
            loss=LOSS,
            metrics=[tf.keras.metrics.RootMeanSquaredError()]
            )

        history = model.fit(
            train_generator,
            #steps_per_epoch=int(df_train.shape[0] / train_batchsize),
            epochs=epochs,
            validation_data=val_generator,
            #validation_steps=int(df_val.shape[0] / train_batchsize),
            callbacks=callbacks_list                              
            )
            
        mlflow.log_param("model_params", model.count_params())

                
        for i_test, df_t in enumerate(test_cases):
                
            test_generator = sequence_img_generator.DataGeneratorGHI_SCNN(df_t, IMAGE_PATH, **params3)

            # Test ghi
            y_test = model.predict(test_generator) * df_t.Target_GHICS.values.reshape(-1, 1)
            y_true = df_t.Target_GHIr.values
            y_pers = df_t.ghi1.values
                    
                    
            mae_test = metrics.mean_squared_error(y_true, y_test)
            mae_per = metrics.mean_squared_error(y_true, y_pers)
                    
            FS = 1 - mae_test / mae_per
                
            print(f'Test case: {df_t.date.dt.date.iloc[0]}')    
            print(f"model_params: {model.count_params()}")
            print(f"mae_test: {mae_test}")
            print(f"mae_pers {mae_per}")
            print(f"FS: {FS}")
                
            mlflow.log_metric(f'mae_test_{i_test}', mae_test)
            mlflow.log_metric(f"mae_pers_{i_test}", mae_per)
            mlflow.log_metric(f"FS_{i_test}", FS)
                
                
        mlflow.tensorflow.log_model(model, f'{m}_{LEARNING_RATE_START}_{LOSS}')

Epoch 1/100
Epoch 1: val_loss improved from inf to 0.05851, saving model to .\model\checkpoints\training_id_SCNN_scaled_15_0.0003_mean_squared_error.h5
Epoch 2/100
Epoch 2: val_loss improved from 0.05851 to 0.03428, saving model to .\model\checkpoints\training_id_SCNN_scaled_15_0.0003_mean_squared_error.h5
Epoch 3/100
Epoch 3: val_loss improved from 0.03428 to 0.02937, saving model to .\model\checkpoints\training_id_SCNN_scaled_15_0.0003_mean_squared_error.h5
Epoch 4/100
Epoch 4: val_loss did not improve from 0.02937
Epoch 5/100
Epoch 5: val_loss did not improve from 0.02937
Epoch 6/100
Epoch 6: val_loss did not improve from 0.02937
Epoch 7/100
Epoch 7: val_loss did not improve from 0.02937
Epoch 8/100
Epoch 8: val_loss did not improve from 0.02937
Epoch 9/100
Epoch 9: val_loss improved from 0.02937 to 0.02857, saving model to .\model\checkpoints\training_id_SCNN_scaled_15_0.0003_mean_squared_error.h5
Epoch 10/100
Epoch 10: val_loss did not improve from 0.02857
Epoch 11/100
Epoch 11: v



Test case: 2023-08-29
model_params: 16812353
mae_test: 35032.94174716967
mae_pers 27290.129370086215
FS: -0.28372208398435284




INFO:tensorflow:Assets written to: C:\Users\Admin\AppData\Local\Temp\tmpoxvnbhm2\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Admin\AppData\Local\Temp\tmpoxvnbhm2\model\data\model\assets


Epoch 1/100
Epoch 1: val_loss improved from inf to 0.03236, saving model to .\model\checkpoints\training_id_SCNN_small_v2_scaled_15_0.0003_mean_squared_error.h5
Epoch 2/100
Epoch 2: val_loss did not improve from 0.03236
Epoch 3/100
Epoch 3: val_loss did not improve from 0.03236
Epoch 4/100
Epoch 4: val_loss improved from 0.03236 to 0.03061, saving model to .\model\checkpoints\training_id_SCNN_small_v2_scaled_15_0.0003_mean_squared_error.h5
Epoch 5/100
Epoch 5: val_loss did not improve from 0.03061
Epoch 6/100
Epoch 6: val_loss improved from 0.03061 to 0.02624, saving model to .\model\checkpoints\training_id_SCNN_small_v2_scaled_15_0.0003_mean_squared_error.h5
Epoch 7/100
Epoch 7: val_loss improved from 0.02624 to 0.02491, saving model to .\model\checkpoints\training_id_SCNN_small_v2_scaled_15_0.0003_mean_squared_error.h5
Epoch 8/100
Epoch 8: val_loss improved from 0.02491 to 0.02343, saving model to .\model\checkpoints\training_id_SCNN_small_v2_scaled_15_0.0003_mean_squared_error.h5
Ep



Test case: 2023-08-29
model_params: 16024385
mae_test: 25783.59552584394
mae_pers 27290.129370086215
FS: 0.05520434966840604




INFO:tensorflow:Assets written to: C:\Users\Admin\AppData\Local\Temp\tmpx50lq2h7\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Admin\AppData\Local\Temp\tmpx50lq2h7\model\data\model\assets
