In [1]:
import pandas as pd
import numpy as np

### Area and accuracy estimation for equal probability sampling, from Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change" [https://doi.org/10.1016/j.rse.2025.114714](https://doi.org/10.1016/j.rse.2025.114714), Appendix C

#### Example of input data

In [2]:
#Read strata info table with columns:
#"Stratum": stratum ID, 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used
#"Area": stratum area in km2 or any other area units;
strata = pd.read_csv('C.Strata_info.txt', sep = '\t')

In [3]:
strata.head().style.hide(axis="index")

Stratum,Area
1,595255.0128
2,332992.9026
3,946369.3351
4,486272.5356
5,669855.4746


In [4]:
#Read sample interpretation table with columns:
#"Stratum": stratum ID, 1 - nstrata; for non-stratified sampling nstrata == 1 and thus only stratum ID = 1 should be used
#"Reference": yes/no target class from reference sample classification (0/1) for each sample point;
#allowed values are -1, 0 and 1 for change area estimation (-1 indicates loss, 1 - gain, 0 - no change).
#(optional)"RefType" - type labels, if the are of target class needs to be estimated separately for multiple sub-types;

data = pd.read_csv('C.Sample_data.txt', sep ='\t')
data.head().style.hide(axis="index")

Stratum,Reference,RefType
10,1,Type1
2,0,Type1
1,0,Type0
3,0,Type0
8,1,Type1


In [5]:
#Merge data table with sample info table
data = data.merge(strata).rename(columns = {'Area':'Ah'})
data.head().style.hide(axis="index")

Stratum,Reference,RefType,Ah
10,1,Type1,161635.388
10,1,Type1,161635.388
10,1,Type1,161635.388
10,1,Type1,161635.388
10,1,Type1,161635.388


#### Functions to estimate land cover class area and its standard error

In [6]:
def estimate_area(df: pd.DataFrame) -> float:
    """ 
    Function to estimate target class area from sample refernce values for continous point sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample points,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for simple random or systematic nstrata == 1 and thus only stratum ID = 1 should be used)
    "Reference" (1 or 0 - yes/no sample point identified as target class in reference classification;
    Reference data column could be defined as  -1 , 0 and 1 to compute net change area of a target class,
    with -1 corresponding to loss, 0 - no change, and 1 - gain
    "Ah" (stratum area, in units that are desired for area reporting)
    ~~~
    Returns estimated target class area in units of Ah, 
    negative area in net change computations means overall net loss of a target class
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix C, equation C.1
    """
    #Group input dataset by stratum
    ByStratum = df.groupby(by = ['Stratum'])

    # Equation C.1
    Ah =  ByStratum.Ah.median()
    yh = ByStratum.Reference.mean()
    area = (Ah * yh).sum()

    return area


In [7]:
def estimate_area_SE(df: pd.DataFrame) -> float:
    """ 
    Function to estimate target class area from sample refernce values for continous point sampling.
    ~~~
    Input dataframe with number of lines equal the number of sample points,
    and the following columns:
    "Stratum" (strata IDs 1 - nstrata; for simple random or systematic nstrata == 1 and thus only stratum ID = 1 should be used)
    "Reference" (1 or 0 - yes/no sample point identified as target class in reference classification;
    Reference data column could be defined as  -1 , 0 and 1 to compute net change area of a target class,
    with -1 corresponding to loss, 0 - no change, and 1 - gain
    "Ah" (stratum area, in units that are desired for area reporting)
    ~~~
    Returns estimated SE of the target class area in in units of Ah,
    SE is always a positive number, even if the estimated target class area is negative
    ~~~
    From Tyukavina et al. (2025) "Practical global sampling methods for estimating area and map accuracy of land cover and change"
    Appendix C, equation C.2
    """
    #Group input dataset by stratum
    ByStratum = df.groupby(by = ['Stratum'])
    
    #Equation C.2
    nh = ByStratum.Reference.count()
    Ah = ByStratum.Ah.median()
    Forstrata = ByStratum.Reference.var(ddof=1) / nh
    StrataVar = Ah**2 * Forstrata
    StrataVarSum = StrataVar.sum()
    SE = np.sqrt(StrataVarSum)
    
    return SE

In [8]:
#Estimate target class area
estimate_area(data)

1221645.1743225136

In [9]:
#Estimate standard error of the target class area
estimate_area_SE(data)

31740.600732646624