In [1]:
import os
import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
from models.edsr import edsr
import matplotlib as plt
import seaborn as sns
import math

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

2024-09-10 17:26:08.065162: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-10 17:26:08.425856: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-10 17:26:08.425900: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-10 17:26:08.425924: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-10 17:26:08.640953: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-10 17:26:08.652671: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

In [2]:
def quantile_clipping(data, percentage, mode="max"):
    quantile_val = np.quantile(data, percentage)
    if mode == "max":
        data = data.clip(max=quantile_val)
    if mode == "min":
        data = data.clip(min=quantile_val)
    return data

def exp_root_norm(data, exp=2):
    return data ** (1 / exp)

def minmax_scale(images):
    # Assuming images is a 4D array with shape (N, 32, 32)
    min_val = np.min(images)
    max_val = np.max(images)
    
    scaled_images = (images - min_val) / (max_val - min_val)
    
    return scaled_images

def preprocess(images):
    images = quantile_clipping(images, 0.95, mode="max")
    images = exp_root_norm(images, exp=2)
    images = minmax_scale(images)
    return images

import pickle

def save_pickle(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
def load_pickle(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

# def pad_to_nearest_square(matrix, num_clients):
#     """
#     Pad the matrix to the nearest size that's perfectly divisible by sqrt(num_clients).
#     """
#     orig_height, orig_width = matrix.shape[1:3]
#     sqrt_clients = int(math.sqrt(num_clients))
    
#     target_size = math.ceil(max(orig_height, orig_width) / sqrt_clients) * sqrt_clients
    
#     padded = np.pad(
#         matrix,
#         ((0, 0), (0, target_size - orig_height), (0, target_size - orig_width), (0, 0)),
#         mode='constant'
#     )
    
#     return padded, (orig_height, orig_width)


In [3]:
dataset = 'germany'

if dataset == 'geant':
    original_size = 22
elif dataset == 'germany':
    original_size = 161

NUM_ROUNDS = 20
overlap_perc = 5
num_clients = 4
c_scale_factor = 2  # This determines the downsampling factor for creating coarse-grained matrices for each client
scale_factor = 2
path_to_data = 'CNSM/data'
ground_truth = f'{dataset}_original_{original_size}.npy'

In [4]:
def calculate_psnr(y_true, y_pred, max_value=1.0):
    """
    Calculate Peak Signal-to-Noise Ratio (PSNR)
    """
    mse = np.mean((y_true - y_pred) ** 2)
    if mse == 0:
        return float('inf')
    return 20 * np.log10(max_value / np.sqrt(mse))

In [5]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from skimage.metrics import structural_similarity as ssim

for p in [5, 25, 50, 75]:
    for n in [2, 3, 4, 5, 6, 7]:
        coarse_size = original_size // scale_factor
        fine_size = coarse_size * scale_factor
        train_file = f'{dataset}_coarse_{coarse_size}_x{scale_factor}.npy'
        train_set = np.load(os.path.join(path_to_data, train_file)).astype(np.float32)
        train_ground_truth = np.load(os.path.join(path_to_data, ground_truth)).astype(np.float32)

        # node_to_index = np.load(os.path.join(path_to_data, 'node_to_index.npy'), allow_pickle=True).item()

        train_set = train_set.reshape((-1, coarse_size, coarse_size, 1))
        train_ground_truth = train_ground_truth.reshape((-1, original_size, original_size, 1))
        print(f"Train set shape: {train_set.shape}", "Ground truth shape:", train_ground_truth.shape)

        train_set = preprocess(train_set)
        train_ground_truth = preprocess(train_ground_truth)

        # Load the federated weights (overlap evaluation)
        fed_weights = load_pickle(f'CNSM/fed_data/{dataset}_fed_weights_{n}_x2_rounds_{NUM_ROUNDS}_overlap_{p}.pkl')

        # Create a new model and load the weights
        model = edsr(input_depth=1, scale=scale_factor, num_filters=64, num_res_blocks=8)
        model.set_weights(fed_weights)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss=tf.keras.losses.MeanSquaredError())

        # Split the data into train and test sets
        train_size = int(0.8 * len(train_set))
        x_train, x_test = train_set[:train_size], train_set[train_size:]
        y_train, y_test = train_ground_truth[:train_size], train_ground_truth[train_size:]

        # Flatten the arrays to 1D for easier calculation
        test_predictions = model.predict(x_test)

        # Flatten the arrays to 1D for easier calculation
        test_predictions = model.predict(x_test)
        if test_predictions.shape[1] != y_test.shape[1]:
            y_test = y_test[:, :test_predictions.shape[1], :test_predictions.shape[1]] # To match the model output
        y_true = y_test.flatten()
        y_pred = test_predictions.flatten()

        # Calculate MSE
        mse = mean_squared_error(y_true, y_pred)

        # Calculate RMSE (Root Mean Squared Error)
        rmse = np.sqrt(mse)

        # Calculate MAE
        mae = mean_absolute_error(y_true, y_pred)

        # Calculate MAPE (Mean Absolute Percentage Error)
        epsilon = 1e-10
        with np.errstate(divide='ignore', invalid='ignore'):
            # Calculate absolute percentage error
            abs_percent_error = np.abs((y_true - y_pred) / (y_true + epsilon))

        # Handle cases where y_true is zero
        zero_true_mask = np.abs(y_true) <= epsilon
        abs_percent_error[zero_true_mask] = np.where(
            np.abs(y_pred[zero_true_mask]) <= epsilon,
            0,  # Both true and predicted are zero (or very close)
            1  # True is zero, but predicted is non-zero
        )
        # Calculate the mean of the absolute percentage errors
        mape = np.mean(abs_percent_error) * 100


        # WMAE
        abs_errors = np.abs(y_true - y_pred)
        weights = np.log1p(np.abs(y_true) + 1e-6)
        weights = weights / np.sum(weights)
        weighted_errors = abs_errors * weights
        wmae =  np.sum(weighted_errors)
        
        ssim_scores = []
        for i in range(y_test.shape[0]):
            ssim_score = ssim(y_test[i, :, :, 0], test_predictions[i, :, :, 0], data_range=1.0)
            ssim_scores.append(ssim_score)
        avg_ssim = np.mean(ssim_scores)

        # Calculate PSNR
        psnr_scores = []
        for i in range(y_test.shape[0]):
            psnr_score = calculate_psnr(y_test[i, :, :, 0], test_predictions[i, :, :, 0])
            psnr_scores.append(psnr_score)
        avg_psnr = np.mean(psnr_scores)


        # Calculate the weighted MAE (WMAE): errors on values close to 0 are less penalized, as they are less important (sparse matrix)

        print(f"Mean Squared Error (MSE): {mse:.4f}")
        print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
        print(f"Mean Absolute Error (MAE): {mae:.4f}")
        print(f"Mean Absolute Percentage Error: {mape:.4f}")
        print(f"WMean Absolute Error (WMAE): {wmae:.4f}")
        print(f"Average Structural Similarity Index (SSIM): {avg_ssim:.4f}")
        print(f"Average Peak Signal-to-Noise Ratio (PSNR): {avg_psnr:.2f} dB")

        # save in a csv file
        csv_filename = os.path.join(path_to_data, f"csv_{dataset}_fed_num_clients_{n}_x{scale_factor}_overlap_{p}.csv")
        with open(csv_filename, "w") as f:
            f.write(f"MSE, RMSE, MAE, MAPE, WMAE, SSIM, PSNR\n")
            f.write(f"{mse:.4f}, {rmse:.4f}, {mae:.4f}, {mape:.4f}, {wmae:.4f}, {avg_ssim:.4f}, {avg_psnr:.2f}\n")
            f.close()

Train set shape: (8993, 80, 80, 1) Ground truth shape: (8993, 161, 161, 1)
Mean Squared Error (MSE): 0.0710
Root Mean Squared Error (RMSE): 0.2664
Mean Absolute Error (MAE): 0.1221
Mean Absolute Percentage Error: 97.6102
WMean Absolute Error (WMAE): 0.6530
Average Structural Similarity Index (SSIM): 0.0693
Average Peak Signal-to-Noise Ratio (PSNR): 12.16 dB
Train set shape: (8993, 80, 80, 1) Ground truth shape: (8993, 161, 161, 1)
Mean Squared Error (MSE): 0.0708
Root Mean Squared Error (RMSE): 0.2660
Mean Absolute Error (MAE): 0.1234
Mean Absolute Percentage Error: 95.9937
WMean Absolute Error (WMAE): 0.6511
Average Structural Similarity Index (SSIM): 0.0417
Average Peak Signal-to-Noise Ratio (PSNR): 12.17 dB
Train set shape: (8993, 80, 80, 1) Ground truth shape: (8993, 161, 161, 1)


FileNotFoundError: [Errno 2] No such file or directory: 'CNSM/fed_data/germany_fed_weights_4_x2_rounds_20_overlap_5.pkl'