Code created with ChatGPT.

Take PyLC JSON output in accuracy metrics folder and extract F1 score and number of pixels ("support") for each land cover category (lcc) in each image and put into a .csv file for use in R.

In [None]:
import os
import pandas as pd
import json

# Function to read JSON data from a file
def read_json_file(json_file_path):
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)
    return data

# Function to process the JSON data and create a single DataFrame for all LCCs
def process_json_data(json_data, file_name):
    lccs = ["NC", "B-MW", "C", "H-S", "S-G-R", "WL", "WT", "S-I", "RA"]
    
    # Create an empty DataFrame to store data for all LCCs
    all_lccs_df = pd.DataFrame(columns=["Image", "LCC", "F1 Score", "Support"])

    for lcc in lccs:
        lcc_data = json_data["report"].get(lcc, None)
        if lcc_data:
            df_data = {
                "Image": [file_name],  # Use the file name as the image name
                "LCC": [lcc],
                "F1 Score": [lcc_data["f1-score"]],
                "Support": [lcc_data["support"]]
            }
            lcc_df = pd.DataFrame(df_data)
            all_lccs_df = pd.concat([all_lccs_df, lcc_df], ignore_index=True)

    return all_lccs_df

# Specify the folder containing the JSON files
json_folder = 'path_to_your/pylc_model/metrics'

# Specify the output CSV file path
output_csv_file = 'path_to_your_data/lccf1.csv'

# Create an empty DataFrame to store data for all LCCs
all_lccs_data = pd.DataFrame(columns=["Image", "LCC", "F1 Score", "Support"])

# Process each JSON file in the folder
for file_name in os.listdir(json_folder):
    if file_name.endswith('.json'):
        json_file_path = os.path.join(json_folder, file_name)
        json_data = read_json_file(json_file_path)

        # Process the JSON data and create a DataFrame for all LCCs
        lccs_df = process_json_data(json_data, file_name)

        # Concatenate the DataFrame for all LCCs with the overall DataFrame
        all_lccs_data = pd.concat([all_lccs_data, lccs_df], ignore_index=True)

# Save the DataFrame to a single CSV file
all_lccs_data.to_csv(output_csv_file, index=False)

# Display the overall DataFrame without index numbers
display(all_lccs_data.style.hide_index())


Take PyLC JSON output in accuracy metrics folder and extract the weighted F1 score (F1), inverse weighted intersection over union (IoU), and Matthew's Correlation Coefficient (MCC) from each image and put into a .csv file for use in R.

In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import shapiro

# Path to the folder containing the .json files
folder_path = "path_to_your/pylc_model/metrics"

# List to store file names and corresponding metrics
metrics_list = []

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    file_ext = os.path.splitext(filename)[1].lower()

    # Check if the file is a .json file
    if file_ext == ".json":
        file_path = os.path.join(folder_path, filename)

        # Load the JSON data from the file
        with open(file_path, "r") as json_file:
            json_data = json.load(json_file)

        # Extract the metrics values
        f1_value = json_data.get("f1")
        iou_value = json_data.get("iou")  # Add this line for IOU
        mcc_value = json_data.get("mcc")  # Add this line for MCC

        if f1_value is not None:
            # Remove the specified substrings from the file name
            display_name = filename.replace("_scale_1.0_eval.json", "").replace("_eval.json", "").replace("_tif",
                                                                                                          "").replace(
                "_tiff", "").replace("_jpg", "").replace("_jpeg", "")
            # Remove the trailing 'f' if it appears after the substrings
            if display_name.endswith('f'):
                display_name = display_name[:-1]
            # Append display name and metrics values to the list
            metrics_list.append((display_name, f1_value, iou_value, mcc_value))

# Create a DataFrame from the list of file names and metrics values
columns = ["File", "F1 Score", "IOU", "MCC"]
df = pd.DataFrame(metrics_list, columns=columns)

# Sort the DataFrame by ascending F1 score
sorted_df = df.sort_values(by="F1 Score", ascending=True)

# Export the DataFrame to a CSV file in the same folder
csv_filename = os.path.join(folder_path, "Summary_F1_IOU_MCC.csv")
sorted_df.to_csv(csv_filename, index=False)

# Display the DataFrame
print(sorted_df)
print(f"\nCSV file saved at: {csv_filename}")