# Clean points
### Sarah M. McDonald, smcdonald@chesapeakebay.net
This notebook cleans the attributes in the AA point shapefiles provided by the CIC.

In [10]:
# imports
import pandas as pd
import geopandas as gpd

# paths
input_folder = r"" # path to folder containing the data from CIC
output_folder = r"" # path to a folder to write the cleaned points
change_point_path = f"{input_folder}/change_matrices/AA_Points_For_Confusion_Matrix/CBW_change.shp"
lcc_crosswalk_path = f"{input_folder}/t1-t3_lc_change_values KEY.csv" # NOTE: assumes the static classes are included


Prepare crosswalk table

In [None]:
# read in crosswalk
cw = pd.read_csv(lcc_crosswalk_path)

# separate T1 and T3 LC into unique columns
cw[['T1', 'T3']] = cw['class'].str.split(' to ', n=1, expand=True)

# copy T1 to T3 for static classes
cw.loc[cw['T3'].isna(), 'T3'] = cw.T1

# create boolean valid value
cw.loc[:, 'validGT'] = True
cw.loc[cw['SMM_added']==1, 'validGT'] = False
cw = cw.drop('SMM_added', axis=1)

# drop columns
cw = cw.drop('class', axis=1)

Read change points

In [23]:
# read in points
gdf = gpd.read_file(change_point_path)

Clean up Class column - create state, type (change, buffer, static), and class columns (the lcc class used as strata).

In [24]:
# add DC prefix to make syntax similar to other states
gdf.loc[gdf['Class'].str.split('_', n=1, expand=True)[0]=='class', 'Class'] = gdf['Class'].str.replace('class', 'DC_class')

# split Class into individual items
x = gdf['Class'].str.split('_',  expand=True)

# create state column
x.loc[:, 'state'] = x[0]
x.loc[x['state']=='NCBuff', 'state'] = 'DC' # no change in DC doesn't have DC prefix

# merge state back to gdf
gdf = gdf.merge(x[['state']], left_index=True, right_index=True)

# create point type - change sample, buffer sample, or static sample (no change no buffer)
gdf.loc[gdf['Class'].str.contains('buffer'), 'type'] = 'buffer'
gdf.loc[gdf['Class'].str.contains('NCBuff'), 'type'] = 'static'
gdf.loc[gdf['type'].isna(), 'type'] = 'change'

# replace to ensure class_## syntax
gdf.loc[:, 'Class'] = gdf['Class'].str.replace('_0_', '_')

# retrieve item after class
gdf.loc[:,'strata_cls'] = gdf['Class'].str.split("class_", n=1, expand=True)[1].str.split('_', n=1, expand=True)[0]
gdf.loc[gdf['strata_cls']=='leftovers', 'strata_cls'] = None


Merge the T1 and T3 class names to observed/mapped and ground-truthed

In [25]:
# add mapped T1 and T3 class names
gdf = (
    gdf
    .merge(cw[['value', 'T1', 'T3']], left_on='Classified', right_on='value', how='left')
    .rename(columns={
        'T1'    : "T1_mapped",
        'T3'    : "T3_mapped",
    })
)

# add ground-truthed T1 and T3 class names
gdf = (
    gdf
    .merge(cw, left_on='GrndTruth', right_on='value', how='left')
    .rename(columns={
        'T1'    : "T1_Truth",
        'T3'    : "T3_Truth",
    })
)

# add srate class T1 and T3 class names
gdf.loc[:, 'strata_cls'] = gdf['strata_cls'].fillna(0)
gdf.loc[:, 'strata_cls'] = gdf.strata_cls.str.split('.', n=1, expand=True)[0].fillna(0).astype(int)
gdf = (
    gdf
    .merge(cw[['value', 'T1', 'T3']], left_on='strata_cls', right_on='value', how='left')
    .rename(columns={
        'T1'    : "T1_strata",
        'T3'    : "T3_strata",
    })
)

# add column showing if class used to stratify points matches mapped
gdf.loc[:, 'StrataMatch'] = 'NA'
gdf.loc[(gdf['Classified'] == gdf['strata_cls']) & (gdf['type'] == 'change') & (~gdf['Class'].str.contains('leftover')), 'StrataMatch'] = 'True'
gdf.loc[(gdf['Classified'] != gdf['strata_cls']) & (gdf['type'] == 'change') & (~gdf['Class'].str.contains('leftover')), 'StrataMatch'] = 'False'

Reorder final columns

In [26]:
# sort data by state, type
gdf = gdf.sort_values(by=['state', 'type', 'strata_cls'])

# add unique integer id
gdf.loc[:, 'uid'] = [x for x in range(1, len(gdf)+1)]

# rename original Class 
gdf = gdf.rename(columns={'Class':'orig_strata', 'Classified':'origMap'})

# reorder columns
gdf = gdf[['uid', 'state', 'type', 'strata_cls', 'T1_strata', 'T3_strata', 'T1_mapped', 'T3_mapped', 'T1_Truth', 'T3_Truth', 'StrataMatch', 'orig_strata', 'GrndTruth', 'validGT', 'origMap', 'geometry']]

Write cleaned data to geopackage

In [None]:
# create gpkg path
path = f"{output_folder}/lcc_aa_points_cleaned.gpkg"

# write results
gdf.to_file(path, layer='AA_clean', driver="GPKG")