In [1]:
import pandas as pd
import numpy as np

### Area and accuracy estimation for equal probability sampling, from Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change" https://doi.org/10.1016/j.rse.2025.114714, Appendix A.1.1

#### Example of input data

In [2]:
#Read strata info table with columns:
#"Stratum": stratum ID, 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used
#"Area": stratum area in km2 or any other area units, needs to be consistend with pixel size area units in data table;
#"Count": total number of units (pixels, polygons) in each stratum
strata = pd.read_csv('A.1.1.Strata_info.txt', sep = '\t')

In [3]:
strata.head().style.hide(axis="index")

Stratum,Area,Count
1,595255.0128,788889906
2,332992.9026,781490883
3,946369.3351,1280971777
4,486272.5356,859622001
5,669855.4746,909836775


In [4]:
#Read sample interpretation table with columns:
#"Stratum": stratum ID, 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used
#"Pixarea": area of each sample unit (in units that are desired for area reporting);
#"Map"(for accuracy assessment only): proportion of target class from the map (0-1) for each sample unit;
#"Reference": proportion of target class from reference sample classification for each sample unit;
#allowed values are from -1 to 1 for area estimation, and from 0 to 1 for map accuracy assessment.
#(optional)"RefType": type labels, if the are of target class needs to be estimated separately for multiple sub-types;
#(optional)"Correct": proportion of sample unit (0-1), which is correctly mapped. 
#This column is necessary for maps with more than two classes. 
#For map with two classes it is computed in the function "estimate_OA_two_classes" directly from "Map" and "Reference" columns

data = pd.read_csv('A.1.1.Sample_data.txt', sep ='\t')
data.head().style.hide(axis="index")

Stratum,Pixarea,Map,Reference,RefType,Correct
10,0.00069,1.0,1.0,Type1,1.0
2,0.000426,0.0,0.1,Type1,0.9
1,0.000755,0.0,0.0,Type0,1.0
3,0.000739,0.0,0.0,Type0,1.0
8,0.000741,1.0,0.9,Type1,0.9


In [5]:
#Merge data table with sample info table
data = data.merge(strata).rename(columns = {'Area':'Ah', 'Count':'Nh', 'Pixarea':'ai'})
data.head().style.hide(axis="index")

Stratum,ai,Map,Reference,RefType,Correct,Ah,Nh
10,0.00069,1.0,1.0,Type1,1.0,161635.388,234157691
10,0.00069,1.0,1.0,Type1,1.0,161635.388,234157691
10,0.00069,1.0,1.0,Type1,1.0,161635.388,234157691
10,0.00069,1.0,1.0,Type1,1.0,161635.388,234157691
10,0.00069,1.0,1.0,Type1,1.0,161635.388,234157691


#### Functions to estimate land cover class area and its standard error

In [6]:
def estimate_area(df: pd.DataFrame) -> float:
    """ 
    Function to estimate target class area from sample refernce values for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification;
    Reference data column could be defined as values in range from -1 to 1 to compute net change area of a target class,
    in this case negative proportions mean net loss, and positive proportions - net gain)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated target class area in units of ai, 
    negative area in net change computations means overall net loss of a target class
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equation A.7
    """
    #Create a copy of columns "Stratum","Nh" and a new column "yi", 
    #which is a reference area of the target class determined for sample unit i 
    df1 = pd.concat([df['Stratum'], df['Nh'],
                     pd.Series(df['ai'] * df['Reference'],name = 'yi')], axis = 1)
                     
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    # Equation A.7
    Nh =  ByStratum.Nh.median()
    yh = ByStratum.yi.mean()
    area = (Nh * yh).sum()

    return area

In [7]:
def estimate_area_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate target class area from sample refernce values for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification;
    Reference data column could be defined as values in range from -1 to 1 to compute net change area of a target class,
    in this case negative proportions mean net loss, and positive proportions - net gain)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated SE of the target class area in in units of ai,
    SE is always a positive number, even if the estimated target class area is negative
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equations A.6 and A.8
    """
    #Create a copy of columns "Stratum","Nh" and a new column "yi", 
    #which is a reference area of the target class determined for sample unit i 
    df1 = pd.concat([df['Stratum'], df['Nh'],
                     pd.Series(df['ai'] * df['Reference'],name = 'yi')], axis = 1)
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.6
    samplevar = ByStratum.yi.var(ddof=1)
   
    #Equation A.8
    Nh = ByStratum.Nh.median()
    N = Nh.sum()
    nh = ByStratum.yi.count()
    Forstrata = samplevar / nh
    StrataVar = Forstrata * (1 - nh / Nh) * Nh**2

    StrataVarSum = StrataVar.sum()
    SE = np.sqrt(StrataVarSum)
    
    return SE

In [8]:
#Estimate target class area
estimate_area(data)

1223902.897388578

In [9]:
#Estimate standard error of the target class area
estimate_area_SE(data)

31611.10237731219

In [10]:
#Estimate target class are for each unique type from the column RefType

functions = [estimate_area, estimate_area_SE]
names = ['area, km²','area SE, km²']
results = pd.DataFrame()
datacopy = pd.DataFrame()

for classtype in data['RefType'].unique():
    datacopy = data.copy()
    #A new reference column, where reference values are set to zero if class type is different from the current class type
    datacopy['Reference'] = np.where(datacopy['RefType'] == classtype, datacopy['Reference'], 0)
    
    values = {}
    values = {nm:[fn(datacopy)] for fn, nm in zip(functions,names)}
    values["Estimate"] = classtype
    values_pd = pd.DataFrame(values).set_index("Estimate")
    results = pd.concat([values_pd, results])

results = results.reset_index()

results.style.hide(axis="index").format({name: '{:.4f}' for name in names})


Estimate,"area, km²","area SE, km²"
Type3,383052.6402,32929.0838
Type2,608160.4333,38476.2488
Type0,0.0,0.0
Type1,232689.8239,27599.8941


#### Functions to estimate overall map accuracy and its standard error

In [11]:
def estimate_OA_two_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate overall accuracy of the map that has two classes (target class vs. no target class)
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equation A.11
    ~~~
    This version of the code uses ratio estimator (equation A.11) instead of equation A.7 as recommended in Appendix A.2.2 of the paper.
    The approach for estimating Overall Accurqacy described in the paper (estimating the area of agreement using equation A.7 and 
    then dividing by a known total area of the study region) is correct, but might produce estimates of OA over 100% in rare cases when Overall Accuracy 
    is close to 100% (i.e., the area of agreement between the map and the reference sample is very close to the total area of the study region). 
    When using a ratio estimator (equation A.11), both the correctly classified area (numerator of the ratio) and the total area of the study region 
    (denominator of the ratio) are estimated from the sample, and thus the estimated Overall Accuracy will not exceed 100%.
    ~~~
    Area of each sample unit (ai) is used as xi in equation A.11
    """
    #Create a copy of columns "Stratum","Nh", "Ah", "ai" and a new column "yi", 
    #where the proportion of the sample unit that is correctly classified (as either of map classes) is first computed as: 
    #min ("Map", "Reference") + min (1-"Map", 1-"Reference"), and then mupliplied by the area of pixel ai to derive
    #the area of the sample unit that is correctly classified
    df1 = pd.concat([df['Stratum'], df['Nh'],df['Ah'], df['ai'],
                     pd.Series(((np.minimum(df['Map'],df['Reference']) + np.minimum((1 - df['Map']),(1 - df['Reference']))) * df['ai']),
                                name = 'yi')], axis = 1)
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.11
    Nh = ByStratum.Nh.median()
    Yest = (Nh * ByStratum.yi.mean()).sum()
    Xest = (Nh * ByStratum.ai.mean()).sum()
    OA = Yest / Xest * 100
    
    return OA

In [12]:
estimate_OA_two_classes(data)

92.08917027110765

In [13]:
def estimate_OA_multiple_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate overall accuracy of the map that has multiple classes
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Correct" - proportion of sample unit (0-1) that is correctly mapped
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equation A.11
    ~~~
    This version of the code uses ratio estimator (equation A.11) instead of equation A.7 as recommended in Appendix A.2.2 of the paper.
    The approach for estimating Overall Accurqacy described in the paper (estimating the area of agreement using equation A.7 and 
    then dividing by a known total area of the study region) is correct, but might produce estimates of OA over 100% in rare cases when Overall Accuracy 
    is close to 100% (i.e., the area of agreement between the map and the reference sample is very close to the total area of the study region). 
    When using a ratio estimator (equation A.11), both the correctly classified area (numerator of the ratio) and the total area of the study region 
    (denominator of the ratio) are estimated from the sample, and thus the estimated Overall Accuracy will not exceed 100%.
    ~~~
    Area of each sample unit (ai) is used as xi in equation A.11
    """               
    #Create a copy of columns "Stratum","Nh", "Ah", "ai" and a new column "yi", 
    #which is the area of the sample unit that is correctly classified, computed as "Correct"*"ai"
    df1 = pd.concat([df['Stratum'], df['Nh'],df['Ah'],df['ai'], pd.Series(df['Correct'] * df['ai'], name = 'yi')], axis = 1)
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])

    #Equation A.11
    Nh = ByStratum.Nh.median()
    Yest = (Nh * ByStratum.yi.mean()).sum()
    Xest = (Nh * ByStratum.ai.mean()).sum()
    OA = Yest / Xest * 100
    
    return OA

In [14]:
estimate_OA_multiple_classes(data)

92.08917027110765

In [15]:
def estimate_SE_OA_two_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error of overall accuracy of the map that has two classes 
    (target class vs. no target class) for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated standard error of the overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equations A.11-A.14
    ~~~
    Area of each sample unit (ai) is used as xi
    """
    #Create a copy of columns "Stratum","Nh", "Ah", "ai" and a new column "yi", 
    #where the proportion of the sample unit that is correctly classified (as either of map classes) is first computed as: 
    #min ("Map", "Reference") + min (1-"Map", 1-"Reference"), and then mupliplied by the area of pixel ai to derive
    #the area of the sample unit that is correctly classified
    df1 = pd.concat([df['Stratum'], df['Nh'],df['Ah'], df['ai'],
                     pd.Series(((np.minimum(df['Map'],df['Reference']) + np.minimum((1 - df['Map']),(1 - df['Reference']))) * df['ai']),
                                name = 'yi')], axis = 1)

    df1['XY'] = df1['ai'] * df1['yi']
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.13
    Nh = ByStratum.Nh.median()
    Xest = (Nh * ByStratum.ai.mean()).sum()
    
    #Equation A.11
    Yest = (Nh * ByStratum.yi.mean()).sum()
    R = Yest / Xest
    
    #Equation A.14
    nh = ByStratum.yi.count()
    meanyh = ByStratum.yi.mean()
    meanxh = ByStratum.ai.mean()
    covarxy = (ByStratum.XY.sum() - nh * meanyh * meanxh) / (nh - 1)

    # Equation A.12
    vary = ByStratum.yi.var(ddof=1)
    varx = ByStratum.ai.var(ddof=1)
    StrataVar = Nh**2 * (1 - nh / Nh) * (vary + R**2 * varx - 2 * R * covarxy) / nh
    StrataVarSum = StrataVar.sum() / Xest / Xest
    SE = np.sqrt(StrataVarSum) * 100
     
    return SE

In [16]:
estimate_SE_OA_two_classes(data)

0.7080369844100617

In [17]:
def estimate_SE_OA_multiple_classes (df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error of overall accuracy of the map that has multiple classes
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting, needs to be consistent with Ah)
    "Correct" - proportion of sample unit (0-1) that is correctly mapped
    "Ah" (stratum area, in units that are desired for area reporting, needs to be consistent with ai)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated standard error of the overall accuracy of the map expressed as percent of the total study area
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equations A.11-A.14
    ~~~
    Area of each sample unit (ai) is used as xi
    """
    #Create a copy of columns "Stratum","Nh", "Ah", "ai" and a new column "yi", 
    #which is the area of the sample unit that is correctly classified, computed as "Correct"*"ai"
    df1 = pd.concat([df['Stratum'], df['Nh'],df['Ah'], df['ai'], pd.Series(df['Correct'] * df['ai'], name = 'yi')], axis = 1)
    
    df1['XY'] = df1['ai'] * df1['yi']
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.13
    Nh = ByStratum.Nh.median()
    Xest = (Nh * ByStratum.ai.mean()).sum()
    
    #Equation A.11
    Yest = (Nh * ByStratum.yi.mean()).sum()
    R = Yest / Xest
    
    #Equation A.14
    nh = ByStratum.yi.count()
    meanyh = ByStratum.yi.mean()
    meanxh = ByStratum.ai.mean()
    covarxy = (ByStratum.XY.sum() - nh * meanyh * meanxh) / (nh - 1)

    # Equation A.12
    vary = ByStratum.yi.var(ddof=1)
    varx = ByStratum.ai.var(ddof=1)
    StrataVar = Nh**2 * (1 - nh / Nh) * (vary + R**2 * varx - 2 * R * covarxy) / nh
    StrataVarSum = StrataVar.sum() / Xest / Xest
    SE = np.sqrt(StrataVarSum) * 100
     
    return SE

In [18]:
estimate_SE_OA_multiple_classes(data)

0.7080369844100617

#### Functions to estimate user's and producer's accuracy of target class and their standard errors

In [19]:
def estimate_UA (df: pd.DataFrame) -> float:
    """ 
    Function to estimate user's accuracy of target class
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated user's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equation A.11
    """
    #Create a copy of columns "Nh", and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is mapped as target class, computed as "Map"*"ai"
    df1 = pd.concat([df['Stratum'],df['Nh'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Map'] * df['ai'], name = 'xi')],axis = 1)
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.11
    Nh = ByStratum.Nh.median()
    Yest = (Nh * ByStratum.yi.mean()).sum()
    Xest = (Nh * ByStratum.xi.mean()).sum()
    UA = Yest / Xest * 100

    return UA

In [20]:
estimate_UA(data)

80.6910638941078

In [21]:
def estimate_PA (df: pd.DataFrame) -> float:
    """ 
    Function to estimate producer's accuracy of target class
    for equal probability sampling.
    ~~~ 
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated producer's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equation A.11
    """
    #Create a copy of columns "Nh", and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is identified as target class in reference classification, computed as "Reference"*"ai"
    df1 = pd.concat([df['Stratum'],df['Nh'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Reference'] * df['ai'], name = 'xi')],axis = 1)
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.11
    Nh = ByStratum.Nh.median()
    Yest = (Nh * ByStratum.yi.mean()).sum()
    Xest = (Nh * ByStratum.xi.mean()).sum()
    PA = Yest / Xest * 100

    return PA

In [22]:
estimate_PA(data)

93.62678076530403

In [23]:
def estimate_UA_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error (SE) of user's accuracy of target class
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated SE of user's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equations A.11-A.14
    """
    #Create a copy of columns "Nh", and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is mapped as target class, computed as "Map"*"ai"
    df1 = pd.concat([df['Stratum'],df['Nh'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Map'] * df['ai'], name = 'xi')],axis = 1)
    
    df1['XY'] = df1['xi'] * df1['yi']
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.13
    Nh = ByStratum.Nh.median()
    Xest = (Nh * ByStratum.xi.mean()).sum()
    
    #Equation A.11
    Yest = (Nh * ByStratum.yi.mean()).sum()
    R = Yest / Xest
    
    #Equation A.14
    nh = ByStratum.yi.count()
    meanyh = ByStratum.yi.mean()
    meanxh = ByStratum.xi.mean()
    covarxy = (ByStratum.XY.sum() - nh * meanyh * meanxh) / (nh - 1)

    # Equation A.12
    vary = ByStratum.yi.var(ddof=1)
    varx = ByStratum.xi.var(ddof=1)
    StrataVar = Nh**2 * (1 - nh / Nh) * (vary + R**2 * varx - 2 * R * covarxy) / nh
    StrataVarSum = StrataVar.sum() / Xest / Xest
    SE = np.sqrt(StrataVarSum) * 100
     
    return SE

In [24]:
estimate_UA_SE(data)

1.8257849453559096

In [25]:
def estimate_PA_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error (SE) of producer's accuracy of target class
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Map" (0-1 - proportion of the sample pixel/polygon identified as target class in the map)
    "Reference" (0-1 - proportion of the sample pixel/polygon identified as target class in reference classification)
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated SE of producer's accuracy of target class expressed as percentage
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equations A.11-A.14
    """
    #Create a copy of columns "Nh", and new columns
    #"yi" - area of sample unit that is correctly classfied as target class, computed as min ("Map", "Reference")*"ai"
    #"xi" - area of a sample unit that is identified as target class in reference classification, computed as "Reference"*"ai"
    df1 = pd.concat([df['Stratum'],df['Nh'],
                     pd.Series((np.minimum(df['Map'],df['Reference']) * df['ai']), name = 'yi'),
                     pd.Series(df['Reference'] * df['ai'], name = 'xi')],axis = 1)
       
    df1['XY'] = df1['xi'] * df1['yi']
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.13
    Nh = ByStratum.Nh.median()
    Xest = (Nh * ByStratum.xi.mean()).sum()
    
    #Equation A.11
    Yest = (Nh * ByStratum.yi.mean()).sum()
    R = Yest / Xest
    
    #Equation A.14
    nh = ByStratum.yi.count()
    meanyh = ByStratum.yi.mean()
    meanxh = ByStratum.xi.mean()
    covarxy = (ByStratum.XY.sum() - nh * meanyh * meanxh) / (nh - 1)

    # Equation A.12
    vary = ByStratum.yi.var(ddof=1)
    varx = ByStratum.xi.var(ddof=1)
    StrataVar = Nh**2 * (1 - nh / Nh) * (vary + R**2 * varx - 2 * R * covarxy) / nh
    StrataVarSum = StrataVar.sum() / Xest / Xest
    SE = np.sqrt(StrataVarSum) * 100
     
    return SE

In [26]:
estimate_PA_SE(data)

1.3788953825926273

#### Functions to estimate % of class1 from class2

In [27]:
def estimate_class_percent (df: pd.DataFrame) -> float:
    """ 
    Function to estimate percent of class1 from class2, both estimated from the sample
    for equal probability sampling.
    ~~~ 
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)
    "Class1" (0-1 - proportion of the sample pixel/polygon identified as class 1 (percentage numerator))
    "Class2" (0-1 - proportion of the sample pixel/polygon identified as class 2 (percentage denominator))
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated percent of class 1 from the area of class 2
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equation A.11
    """
    #Create a copy of columns "Nh", and new columns
    #"yi" - area of sample unit that is identified as class 1 in the reference classification, computed as "Class1"*"ai"
    #"xi" - rea of sample unit that is identified as class 2 in the reference classification, computed as "Class2"*"ai"
    df1 = pd.concat([df['Stratum'],df['Nh'],
                     pd.Series((df['Class1'] * df['ai']), name = 'yi'),
                     pd.Series(df['Class2'] * df['ai'], name = 'xi')],axis = 1)
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.11
    Nh = ByStratum.Nh.median()
    Yest = (Nh * ByStratum.yi.mean()).sum()
    Xest = (Nh * ByStratum.xi.mean()).sum()
    PERC = Yest / Xest * 100

    return PERC

In [28]:
def estimate_class_percent_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate the standard error (SE) of percent of class1 from class2, both estimated from the sample
    for equal probability sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample pixels/polygons,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used)
    "ai" (area of each sample unit, in units that are desired for area reporting)  
    "Class1" (0-1 - proportion of the sample pixel/polygon identified as class 1 (percentage numerator))
    "Class2" (0-1 - proportion of the sample pixel/polygon identified as class 2 (percentage denominator))
    "Nh" (number of units in each stratum h)
    ~~~
    Returns estimated SE of percent of class 1 from class 2
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix A.1.1, equations A.11-A.14
    """
    #Create a copy of columns "Nh", and new columns
    #"yi" - area of sample unit that is identified as class 1 in the reference classification, computed as "Class1"*"ai"
    #"xi" - rea of sample unit that is identified as class 2 in the reference classification, computed as "Class2"*"ai"
    df1 = pd.concat([df['Stratum'],df['Nh'],
                     pd.Series((df['Class1'] * df['ai']), name = 'yi'),
                     pd.Series(df['Class2'] * df['ai'], name = 'xi')],axis = 1)
    
    
    df1['XY'] = df1['xi'] * df1['yi']
    
    #Group copy of input dataset by stratum
    ByStratum = df1.groupby(by = ['Stratum'])
    
    #Equation A.13
    Nh = ByStratum.Nh.median()
    Xest = (Nh * ByStratum.xi.mean()).sum()
    
    #Equation A.11
    Yest = (Nh * ByStratum.yi.mean()).sum()
    R = Yest / Xest
    
    #Equation A.14
    nh = ByStratum.yi.count()
    meanyh = ByStratum.yi.mean()
    meanxh = ByStratum.xi.mean()
    covarxy = (ByStratum.XY.sum() - nh * meanyh * meanxh) / (nh - 1)

    # Equation A.12
    vary = ByStratum.yi.var(ddof=1)
    varx = ByStratum.xi.var(ddof=1)
    StrataVar = Nh**2 * (1 - nh / Nh) * (vary + R**2 * varx - 2 * R * covarxy) / nh
    StrataVarSum = StrataVar.sum() / Xest / Xest
    SE = np.sqrt(StrataVarSum) * 100
     
    return SE

In [29]:
#Example of use: estimate % of Types 0-3 from the overall area of the target class (Reference>0).

functions = [estimate_class_percent, estimate_class_percent_SE]
names = ['% from target class','SE']
results=pd.DataFrame()
datacopy=pd.DataFrame()

for classtype in data['RefType'].unique():
    datacopy = data.copy()
    #A new Class1 column, where reference values are set to zero if class type is different from the current class type
    datacopy['Class1']=np.where(datacopy['RefType'] == classtype, datacopy['Reference'], 0)
    
    #Set Class2 equal to Reference (total area of target class)
    datacopy['Class2']=datacopy['Reference']
    
    values={}
    values = {nm:[fn(datacopy)] for fn, nm in zip(functions,names)}
    values["Estimate"]=classtype
    values_pd = pd.DataFrame(values).set_index("Estimate")
    results = pd.concat([values_pd, results])

results=results.reset_index()

results.style.hide(axis="index").format({name: '{:.2f}' for name in names})


Estimate,% from target class,SE
Type3,31.3,2.61
Type2,49.69,2.81
Type0,0.0,0.0
Type1,19.01,2.2
