# Compute Fuzzy Accuracies
### Sarah M. McDonald, smcdonald@chesapeakebay.net

Imports and paths

In [100]:
# imports
import pandas as pd
import geopandas as gpd
import os
from openpyxl import load_workbook

# paths
folder = r"" # path to folder containing data
points_path = f"{folder}/clean_points/lcc_aa_points_cleaned.gpkg"
ta_lc  = f"{folder}/clean_points/fuzzy_TA/DC_lc_TA_3x3.dbf"
ta_lcc = f"{folder}/clean_points/fuzzy_TA/DC_lcc_TA_3x3.dbf"
cw_path = f"{folder}/CIC/t1-t3_lc_change_values KEY.csv"
excel_path = f"{folder}/clean_points/fuzzy_TA/summary_tables/DC_fuzzy3x3_tables.xlsx" # fuzzy accurracy tables

Read in Data

In [None]:
lc = (
    gpd.read_file(ta_lc)
    .drop('geometry', axis=1)
)
lcc = (
    gpd.read_file(ta_lcc)
    .drop('geometry', axis=1)
)
points = (
    gpd.read_file(points_path)
    .drop('geometry', axis=1)
)

if os.path.isfile(excel_path):
    print("Excel path exists - deleting")
    os.remove(excel_path)

# write TA to workbook
lc.to_excel(excel_path, sheet_name="lc_TA", index=False)
book = load_workbook(excel_path)
writer = pd.ExcelWriter(excel_path, engine = 'openpyxl')
writer.book = book
lcc.to_excel(writer, sheet_name="lcc_TA", index=False)
writer.close()

Merge Ground Truth to TA Data

In [102]:
lc_df = (
    lc.merge(
        (
            points
            .filter(items=['uid', 'GrndTruth'], axis=1)
        )
        , left_on='UID', right_on='uid'
    )
)

lcc_df = (
    lcc.merge(
        (
            points
            .filter(items=['uid','GrndTruth'], axis=1)
        )
        , left_on='UID', right_on='uid'
    )
)

Add the LC Raster Value for Easy Querying

In [103]:
cw = pd.read_csv(cw_path)
cw.loc[:, 'static'] = cw['class'].str.split(' to ', n=1, expand=True)[1]
cw.loc[cw['static'].isna(), 'static'] = cw['class']
cw = (
    cw
    .filter(items=['value','static'], axis=1)
    .merge((
        cw
        .filter(items=['value', 'class'], axis=1)
    ), 
    left_on='static', right_on='class')
    .rename(columns={'value_x':'value', 'value_y':'GT_static'})
    .filter(items=['value', 'GT_static'], axis=1)
)

lc_df = (
    lc_df
    .merge(cw, left_on='GrndTruth', right_on='value')
    .drop('value', axis=1)
)

points = (
    points
    .merge(cw, left_on='origMap', right_on='value', how='left')
    .rename(columns={'GT_static':'Map_static'})
)

Compute Boolean Column showing if the GrndTruth Value Exists within the Window

In [104]:
# get list of unique ground truth values
lc_gt  = lc_df['GT_static'].unique().tolist()
lcc_gt = lcc_df['GrndTruth'].unique().tolist()

# iterate unique Ground Truth values and mark as T/F if any area for that class is present
lc_df.loc[:, 'GT_st_3x3'] = False
for lc in lc_gt:
    col = f"VALUE_{lc}" 
    if col in lc_df.columns:
        lc_df.loc[(lc_df['GT_static']==lc)&(lc_df[col] > 0), 'GT_st_3x3'] = True

# iterate unique Ground Truth values and mark as T/F if any area for that class is present
lcc_df.loc[:, 'GT_ch_3x3'] = False
for lcc in lcc_gt:
    col = f"VALUE_{lcc}" 
    if col in lcc_df.columns:
        lcc_df.loc[(lcc_df['GrndTruth']==lcc)&(lcc_df[col] > 0), 'GT_ch_3x3'] = True

Merge results into single table

In [105]:
df = (
    lc_df
    .filter(items=['uid', 'GT_static' , 'GT_st_3x3'], axis=1)
    .merge(
        (
            lcc_df
            .filter(items=['uid', 'GT_ch_3x3'], axis=1)
        ),
        on='uid',
        how='outer'
    )
    .merge(
        (
            points
            .filter(items=['uid', 'type', 'GrndTruth', 'strata_cls', 'validGT', 'StrataMatch', 'Map_static'])
        ),
        on='uid',
        how='left'
    )
)

df = df[['uid', 'type', 'strata_cls', 'Map_static', 'GrndTruth', 'GT_ch_3x3', 'GT_static', 'GT_st_3x3', 'validGT', 'StrataMatch']]
df = df.sort_values('uid')

# write confusion matrices
book = load_workbook(excel_path)
writer = pd.ExcelWriter(excel_path, engine = 'openpyxl')
writer.book = book
df.to_excel(writer, sheet_name="all_data", index=False)
writer.close()

Summarize Results for Static

In [106]:
def get_pivot(df_):
    # calculate "True" where fuzzy is true
    df_T = (
        df_
        .query("GT_st_3x3")
        .drop("Map_static", axis=1)
        .groupby(['GT_static', 'GT_st_3x3'])
        .sum()
        .reset_index()
    )

    # update mapped value to match gt
    df_T.loc[:, "Map_static"] = df_T.GT_static

    # compute pivot table
    pivot = (
        df_
        .query("not GT_st_3x3")
        .append(df_T)
        .pivot(columns="GT_static", index="Map_static", values="count")
        .fillna(0)
    )

    # reorganize to be 1-12 and add missing rows/columns 
    lcs = [i for i in range(1, 13)]
    pivot = pivot.reindex(lcs, axis=0, fill_value=0)
    pivot = pivot.reindex(lcs, axis=1, fill_value=0)

    # compute totals
    pivot.loc[:, 'frequency'] = pivot.sum(axis=1)
    pivot.loc['frequency'] = pivot.sum(axis=0)

    # compute PA and UA
    diag_sum = 0
    for lc in lcs:
        pivot.loc[lc, 'UA'] = pivot.loc[lc, lc] / pivot.loc[lc, 'frequency']
        pivot.loc['PA', lc] = pivot.loc[lc, lc] / pivot.loc['frequency', lc]
        diag_sum += pivot.loc[lc, lc]

    # calculate overall accuracy
    pivot.loc['PA', 'UA'] = diag_sum / pivot.loc['frequency', 'frequency']

    return pivot


In [None]:
# all info for static matrices
static_all = (
    df
    .filter(['GT_static', 'Map_static', 'GT_st_3x3'], axis=1)
    .groupby(['GT_static', 'Map_static', 'GT_st_3x3'])
    .size()
    .reset_index()
    .rename(columns={0:"count"})
)

all_static_pivot = get_pivot(static_all)

# static info for non-change pixels (i.e. buffers and static)
static_noChange = (
    df
    .query("type != 'change'")
    .filter(['GT_static', 'Map_static','GT_st_3x3'], axis=1)
    .groupby(['GT_static', 'Map_static', 'GT_st_3x3'])
    .size()
    .reset_index()
    .rename(columns={0:"count"})
)

static_noChange_pivot = get_pivot(static_noChange)

# static info from static points only
static_noBuffer = (
    df
    .query("type == 'static'")
    .filter(['GT_static', 'Map_static', 'GT_st_3x3'], axis=1)
    .groupby(['GT_static', 'Map_static', 'GT_st_3x3'])
    .size()
    .reset_index()
    .rename(columns={0:"count"})
)

static_noBuffer_pivot = get_pivot(static_noBuffer)

# write confusion matrices
book = load_workbook(excel_path)
writer = pd.ExcelWriter(excel_path, engine = 'openpyxl')
writer.book = book
all_static_pivot.to_excel(writer, sheet_name="st_confusion_all")
static_noChange_pivot.to_excel(writer, sheet_name="st_confusion_bufStatic")
static_noBuffer_pivot.to_excel(writer, sheet_name="st_confusion_staticOnly")
writer.close()