In [1]:
import pandas as pd
import numpy as np

### Area and accuracy estimation for equal probability sampling, from Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change" https://doi.org/10.1016/j.rse.2025.114714, Appendix A.1.2

#### Example of input data

In [2]:
#Read strata info table with columns:
#"Stratum": stratum ID, 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used
#"Area": stratum area in km2 or any other area units, needs to be consistend with pixel size area units in data table;
strata = pd.read_csv('A.1.2.Strata_info.txt', sep = '\t')

In [3]:
strata.head().style.hide(axis="index")

Stratum,Area
1,595255.0128
2,332992.9026
3,946369.3351
4,486272.5356
5,669855.4746


In [4]:
#Read sample interpretation table with columns:
#"Stratum": stratum ID, 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used
#"Pixarea": area of each sample unit (in units that are desired for area reporting);
#"Map"(for accuracy assessment only): proportion of target class from the map (0-1) for each sample unit;
#"Reference": proportion of target class from reference sample classification for each sample unit;
#allowed values are from -1 to 1 for area estimation, and from 0 to 1 for map accuracy assessment.
#(optional)"RefType": type labels, if the are of target class needs to be estimated separately for multiple sub-types;
#(optional)"Correct": proportion of sample unit (0-1), which is correctly mapped. 
#This column is necessary for maps with more than two classes. 
#For map with two classes it is computed in the function "estimate_OA_two_classes" directly from "Map" and "Reference" columns

data = pd.read_csv('A.1.2.Sample_data.txt', sep ='\t')

In [5]:
#Merge data table with sample info table
data = data.merge(strata)
data = data.rename(columns = {'Area':'Ah','Pixarea':'ai'})

In [6]:
data.head().style.hide(axis="index")

Stratum,ai,Map,Reference,RefType,Correct,Ah
10,0.000638,1.0,1.0,Type1,1.0,161635.388
10,0.000626,1.0,1.0,Type1,1.0,161635.388
10,0.000769,1.0,1.0,Type1,1.0,161635.388
10,0.00067,1.0,1.0,Type1,1.0,161635.388
10,0.000649,1.0,1.0,Type1,1.0,161635.388


#### Functions to estimate class area and its standard error

In [7]:
def estimate_area(df: pd.DataFrame) -> float:
    """ 
    Function to estimate target class area from sample refernce values 
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for simple random or systematic nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification;
    Reference data column could be defined as values in range from -1 to 1 to compute net change area of a target class,
    in this case negative proportions mean net loss, and positive proportions - net gain)
    "Ah" (area of stratum stratum h, in the same units as ai)
    ~~~
    Returns estimated target class area in units of ai, 
    negative area in net change computations means overall net loss of a target class
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.26-A.28
    """
    #Create a copy of columns "Stratum","Ah", "ai" and a new column "yi", 
    #which is a reference area of the target class determined for sample unit i 
    df1 = pd.concat([df['Stratum'], df['Ah'], df['ai'],
                     pd.Series(df['ai'] * df['Reference'],name = 'yi')], axis = 1)
   
    #Equation A.28
    df1['phi'] = df1['ai'] / df1['Ah']

    #Equation A.27
    df1['yidivphi'] = df1['yi'] / df1['phi']
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    Yh = 1 / nh * ByStratum.yidivphi.sum()

    #Equation 26                  
    area = Yh.sum()

    return area


In [8]:
def estimate_area_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate standard error (SE) of the target class area from sample refernce values 
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Presented variance estimator is unbiased for sampling with replacement, and an approximation for sampling without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for simple random or systematic nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification;
    Reference data column could be defined as values in range from -1 to 1 to compute net change area of a target class,
    in this case negative proportions mean net loss, and positive proportions - net gain)
    "Ah" (area of stratum stratum h, in the same units as ai)
    ~~~
    Returns estimated SE of the target class area in in units of ai,
    SE is always a positive number, even if the estimated target class area is negative
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.27-A.30
    """
    #Create a copy of columns "Stratum","Ah", "ai" and a new column "yi", 
    #which is a reference area of the target class determined for sample unit i 
    df1 = pd.concat([df['Stratum'], df['Ah'], df['ai'],
                     pd.Series(df['ai'] * df['Reference'],name = 'yi')], axis = 1)
   
    #Equation A.28
    df1['phi'] = df1['ai'] / df1['Ah']

    #Equation A.27
    df1['yidivphi'] = df1['yi'] / df1['phi']
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    Yh = 1 / nh * ByStratum.yidivphi.sum()

    #Data preparation for Equation A.30
    Yhdf = Yh.to_frame(name='Yh').reset_index()
    df1 = df1.merge(Yhdf)

    #Equation A.30
    df1['ForVar'] = (df1['yidivphi'] - df1['Yh'])**2
    ByStratum = df1.groupby(by = ['Stratum'])
    Varh = ByStratum.ForVar.sum() / ( nh * (nh - 1))

    #Equation A.29
    Var = Varh.sum()
    SE = np.sqrt(Var)
    
    return SE

In [9]:
#Estimate target class area
estimate_area(data)

1223903.8326854412

In [10]:
#Estimate standard error of the target class area
estimate_area_SE(data)

31611.122385173083

In [11]:
#Estimate target class are for each unique type from the column RefType

functions = [estimate_area, estimate_area_SE]
names = ['area, km²','area SE, km²']
results = pd.DataFrame()
datacopy = pd.DataFrame()

for classtype in data['RefType'].unique():
    datacopy = data.copy()
    #A new reference column, where reference values are set to zero if class type is different from the current class type
    datacopy['Reference']=np.where(datacopy['RefType'] == classtype, datacopy['Reference'], 0)
    
    values = {}
    values = {nm:[fn(datacopy)] for fn, nm in zip(functions,names)}
    values["Estimate"] = classtype
    values_pd = pd.DataFrame(values).set_index("Estimate")
    results = pd.concat([values_pd, results])

results = results.reset_index()

results.style.hide(axis="index").format({name: '{:.4f}' for name in names})


Estimate,"area, km²","area SE, km²"
Type3,383052.9482,32929.1133
Type2,608160.8876,38476.2797
Type0,0.0,0.0
Type1,232689.9969,27599.9173


#### Functions to estimate overall map accuracy and its standard error

In [12]:
def estimate_OA_two_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate overall accuracy of the map that has two classes (target class vs. no target class)
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33
    ~~~
    This version of the code uses ratio estimator (equation A.31) instead of equation A.26 as recommended in Appendix A.2.2 of the paper.
    The approach for estimating Overall Accurqacy described in the paper (estimating the area of agreement using equation A.26 and 
    then dividing by a known total area of the study region) is correct, but might produce estimates of OA over 100% in rare cases when Overall Accuracy 
    is close to 100% (i.e., the area of agreement between the map and the reference sample is very close to the total area of the study region). 
    When using a ratio estimator (equation A.31), both the correctly classified area (numerator of the ratio) and the total area of the study region 
    (denominator of the ratio) are estimated from the sample, and thus the estimated Overall Accuracy will not exceed 100%.
    ~~~
    Area of each sample unit (ai) is used as xi in equation A.33
    """
    #Create a copy of columns "Stratum","Ah","ai" and a new column "yi", 
    #where the proportion of the sample unit that is correctly classified (as either of map classes) is first computed as: 
    #min ("Map", "Reference") + min (1-"Map", 1-"Reference"), and then mupliplied by the area of pixel ai to derive
    #the area of the sample unit that is correctly classified
    df1 = pd.concat([df['Stratum'], df['Ah'],df['ai'],
                     pd.Series(((np.minimum(df['Map'],df['Reference']) + np.minimum((1 - df['Map']),(1 - df['Reference']))) * df['ai']),
                                name = 'yi')], axis = 1)
    
    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['ai'] / df1['PIi']).sum()
    
    #Equation A.31
    OA = Yest / Xest * 100

    return OA

In [13]:
estimate_OA_two_classes(data)

92.08916693742646

In [14]:
def estimate_OA_multiple_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate overall accuracy of the map that has multiple classes
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Correct" - proportion of sample unit (0-1) that is correctly mapped
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33
    ~~~
    This version of the code uses ratio estimator (equation A.31) instead of equation A.26 as recommended in Appendix A.2.2 of the paper.
    The approach for estimating Overall Accurqacy described in the paper (estimating the area of agreement using equation A.26 and 
    then dividing by a known total area of the study region) is correct, but might produce estimates of OA over 100% in rare cases when Overall Accuracy 
    is close to 100% (i.e., the area of agreement between the map and the reference sample is very close to the total area of the study region). 
    When using a ratio estimator (equation A.31), both the correctly classified area (numerator of the ratio) and the total area of the study region 
    (denominator of the ratio) are estimated from the sample, and thus the estimated Overall Accuracy will not exceed 100%.
    ~~~
    Area of each sample unit (ai) is used as xi in equation A.33
    """
    #Create a copy of columns "Stratum", "Ah", "ai" and a new column "yi", 
    #which is the area of the sample unit that is correctly classified, computed as "Correct"*"ai"
    df1 = pd.concat([df['Stratum'], df['ai'],df['Ah'], pd.Series(df['Correct'] * df['ai'], name = 'yi')], axis = 1)
                     
    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['ai'] / df1['PIi']).sum()
    
    #Equation A.31
    OA = Yest / Xest * 100

    return OA

In [15]:
estimate_OA_multiple_classes(data)

92.08916693742646

In [16]:
def estimate_SE_OA_two_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error of overall accuracy of the map that has two classes (target class vs. no target class)
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Presented variance estimator is unbiased for sampling with replacement, and an approximation for sampling without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated standard error of the overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33, A.35
    ~~~
    Area of each sample unit (ai) is used as xi
    """
    #Create a copy of columns "Stratum","Ah","ai" and a new column "yi", 
    #where the proportion of the sample unit that is correctly classified (as either of map classes) is first computed as: 
    #min ("Map", "Reference") + min (1-"Map", 1-"Reference"), and then mupliplied by the area of pixel ai to derive
    #the area of the sample unit that is correctly classified
    df1 = pd.concat([df['Stratum'], df['Ah'],df['ai'],
                     pd.Series(((np.minimum(df['Map'],df['Reference']) + np.minimum((1 - df['Map']),(1 - df['Reference']))) * df['ai']),
                                name = 'yi')], axis = 1)
    
    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['ai'] / df1['PIi']).sum()
    
    #Equation A.31
    R = Yest / Xest 

    #Equation A.35
    df1['R'] = R
    Var = ((1 - df1['PIi']) * (df1['yi'] - df1['R'] *  df1['ai'])**2 / df1['PIi']**2).sum() / Xest**2
    
    SE = np.sqrt(Var)*100

    return SE

In [17]:
estimate_SE_OA_two_classes(data)

0.74436703027652

In [18]:
def estimate_SE_OA_multiple_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error of overall accuracy of the map that has multiple classes
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Presented variance estimator is unbiased for sampling with replacement, and an approximation for sampling without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Correct" - proportion of sample unit (0-1) that is correctly mapped
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated standard error of the overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33, A.35
    ~~~
    Area of each sample unit (ai) is used as xi
    """
    #Create a copy of columns "Stratum", "Ah", "ai" and a new column "yi", 
    #which is the area of the sample unit that is correctly classified, computed as "Correct"*"ai"
    df1 = pd.concat([df['Stratum'], df['ai'],df['Ah'], pd.Series(df['Correct'] * df['ai'], name = 'yi')], axis = 1)
                  
    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['ai'] / df1['PIi']).sum()
    
    #Equation A.31
    R = Yest / Xest 

    #Equation A.35
    df1['R'] = R
    Var = ((1 - df1['PIi']) * (df1['yi'] - df1['R'] *  df1['ai'])**2 / df1['PIi']**2).sum() / Xest**2
    
    SE = np.sqrt(Var)*100

    return SE

In [19]:
estimate_SE_OA_multiple_classes(data)

0.74436703027652

#### Functions to estimate user's and producer's accuracy of target class and their standard errors

In [20]:
def estimate_UA (df: pd.DataFrame) -> float:
    """ 
    Function to estimate user's accuracy of target class
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated user's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33
    """
    #Create a copy of columns "Ah","ai" and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is mapped as target class, computed as "Map"*"ai"
    df1 = pd.concat([df['Stratum'],df['Ah'],df['ai'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Map'] * df['ai'], name = 'xi')],axis = 1)

    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['xi'] / df1['PIi']).sum()
    
    #Equation A.31
    UA = Yest / Xest * 100

    return UA

In [21]:
estimate_UA(data)

80.6910655161679

In [22]:
def estimate_PA (df: pd.DataFrame) -> float:
    """ 
    Function to estimate producer's accuracy of target class
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~ 
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated producer's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33
    """
    #Create a copy of columns "Ah","ai" and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is identified as target class in reference classification, computed as "Reference"*"ai"
    df1 = pd.concat([df['Stratum'],df['Ah'],df['ai'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Reference'] * df['ai'], name = 'xi')],axis = 1)

    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['xi'] / df1['PIi']).sum()
    
    #Equation A.31
    PA = Yest / Xest * 100

    return PA

In [23]:
estimate_PA(data)

93.62678633355188

In [24]:
def estimate_UA_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error (SE) of user's accuracy of target class
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Presented variance estimator is unbiased for sampling with replacement, and an approximation for sampling without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated SE of user's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33, A.35
    """
    #Create a copy of columns "Ah","ai" and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is mapped as target class, computed as "Map"*"ai"
    df1 = pd.concat([df['Stratum'],df['Ah'],df['ai'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Map'] * df['ai'], name = 'xi')],axis = 1)

    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['xi'] / df1['PIi']).sum()
    
    #Equation A.31
    R = Yest / Xest 

    #Equation A.35
    df1['R'] = R
    Var = ((1 - df1['PIi']) * (df1['yi'] - df1['R'] *  df1['xi'])**2 / df1['PIi']**2).sum() / Xest**2
    
    SE = np.sqrt(Var)*100

    return SE

In [25]:
estimate_UA_SE(data)

1.8753281165637898

In [26]:
def estimate_PA_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error (SE) of producer's accuracy of target class
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Presented variance estimator is unbiased for sampling with replacement, and an approximation for sampling without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated SE of producer's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33, A.35
    """
    #Create a copy of columns "Ah","ai" and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is identified as target class in reference classification, computed as "Reference"*"ai"
    df1 = pd.concat([df['Stratum'],df['Ah'],df['ai'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Reference'] * df['ai'], name = 'xi')],axis = 1)
    
    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['xi'] / df1['PIi']).sum()
    
    #Equation A.31
    R = Yest / Xest 

    #Equation A.35
    df1['R'] = R
    Var = ((1 - df1['PIi']) * (df1['yi'] - df1['R'] *  df1['xi'])**2 / df1['PIi']**2).sum() / Xest**2
    
    SE = np.sqrt(Var)*100

    return SE

In [27]:
estimate_PA_SE(data)

1.4329110878357287

#### Functions to estimate % of class1 from class2

In [28]:
def estimate_class_percent (df: pd.DataFrame) -> float:
    """ 
    Function to estimate percent of class1 from class2, both estimated from the sample
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~ 
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Class1" (0-1 - proportion of the sample pixel/polygon identified as class 1 (percentage numerator))
    "Class2" (0-1 - proportion of the sample pixel/polygon identified as class 2 (percentage denominator))
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated percent of class1 from the area of class2
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-33
    """
    #Create a copy of columns "Ah","ai" and new columns
    #"yi" - area of sample unit that is identified as class 1 in the reference classification, computed as "Class1"*"ai"
    #"xi" - rea of sample unit that is identified as class 2 in the reference classification, computed as "Class2"*"ai"
    df1 = pd.concat([df['Stratum'],df['Ah'],df['ai'],
                     pd.Series((df['Class1'] * df['ai']), name = 'yi'),
                     pd.Series(df['Class2'] * df['ai'], name = 'xi')],axis = 1)
    
    #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['xi'] / df1['PIi']).sum()
    
    #Equation A.31
    PERC = Yest / Xest * 100

    return PERC

In [29]:
def estimate_class_percent_SE (df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error (SE) of percent of class1 from class2, both estimated from the sample
    for unequal probability sampling, with inclusion probabilities proportional to unit area within each stratum.
    Unbiased estimator for sampling both with and without replacement.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Class1" (0-1 - proportion of the sample pixel/polygon identified as class 1 (percentage numerator))
    "Class2" (0-1 - proportion of the sample pixel/polygon identified as class 2 (percentage denominator))
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    ~~~
    Returns estimated SE of percent of class1 from class2
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.2, equations A.25, A.31-A.33, A.35
    """
    #Create a copy of columns "Ah","ai" and new columns
    #"yi" - area of sample unit that is identified as class 1 in the reference classification, computed as "Class1"*"ai"
    #"xi" - rea of sample unit that is identified as class 2 in the reference classification, computed as "Class2"*"ai"
    df1 = pd.concat([df['Stratum'],df['Ah'],df['ai'],
                     pd.Series((df['Class1'] * df['ai']), name = 'yi'),
                     pd.Series(df['Class2'] * df['ai'], name = 'xi')],axis = 1)
    
   #Prepare data for equation A.25
    ByStratum = df1.groupby(by = ['Stratum'])
    nh = ByStratum.yi.count()
    nhdf = nh.to_frame(name='nh').reset_index()
    df1 = df1.merge(nhdf)
    
    #Equation A.25 Compute inclusion probability for each sampled unit
    df1['PIi'] = df1['nh'] * df1['ai'] / df1['Ah']
    
    #Equation A.32
    Yest = (df1['yi'] / df1['PIi']).sum()
    #Equation A.33
    Xest = (df1['xi'] / df1['PIi']).sum()
    
    #Equation A.31
    R = Yest / Xest 

    #Equation A.35
    df1['R'] = R
    Var = ((1 - df1['PIi']) * (df1['yi'] - df1['R'] *  df1['xi'])**2 / df1['PIi']**2).sum() / Xest**2
    
    SE = np.sqrt(Var)*100

    return SE

In [30]:
#Example of use: estimate % of Types 0-3 from the overall area of the target class (Reference>0).

functions = [estimate_class_percent, estimate_class_percent_SE]
names = ['% from target class','SE']
results = pd.DataFrame()
datacopy = pd.DataFrame()

for classtype in data['RefType'].unique():
    datacopy = data.copy()
    #A new Class1 column, where reference values are set to zero if class type is different from the current class type
    datacopy['Class1'] = np.where(datacopy['RefType'] == classtype, datacopy['Reference'], 0)
    
    #Set Class2 equal to Reference (total area of target class)
    datacopy['Class2'] = datacopy['Reference']
    
    
    values = {}
    values = {nm:[fn(datacopy)] for fn, nm in zip(functions,names)}
    values["Estimate"] = classtype
    values_pd = pd.DataFrame(values).set_index("Estimate")
    results = pd.concat([values_pd, results])

results = results.reset_index()

results.style.hide(axis="index").format({name: '{:.2f}' for name in names})


Estimate,% from target class,SE
Type3,31.3,2.61
Type2,49.69,2.8
Type0,0.0,0.0
Type1,19.01,2.19
