# 1. Import necessary libraries for data handling and visualization.

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.ticker as mticker

# 2. The eSBAE function:

In [57]:
def calculate_areas(db_total, strata_column, categories_column, total_area, z_score):
    
    df_full = db_total.copy()
    df_interpreted = db_total[~db_total[categories_column].isna()]
    
    # get all attributes
    categories = df_interpreted[categories_column].unique()
    
    # get strata
    strata, d = df_interpreted[strata_column].unique(), {}
    print(categories)
    # create stats for each entry
    for category in categories:
        
        if str(category) == 'nan':
            continue
            
        print(f' Calculating stats for {category}')
        # create binary class column
        df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
        print(f'There are {df_interpreted[category].sum()} entries of {category} in {categories_column}.')
        
        # initialize variables for category <> check the catergories > def, deg, gain
        categories_area, se_total = 0, 0
        d2 = {}
        for stratum in strata:
                        
            if str(stratum) == 'nan':
                continue
            
            # subset to stratum
            df_stratum = df_full[df_full[strata_column] == stratum]
            
            # get area proportion for that stratum on full dataset
            proportion_strata = len(df_stratum)/len(df_full)

            # get stratum area
            stratum_area = proportion_strata * total_area

            # get proportion of forest change within strata from interpreted data
            proportion_category = len(
                df_interpreted[(df_interpreted[strata_column] == stratum) & (df_interpreted[category] == 1)]
            ) / len(
                df_interpreted[df_interpreted[strata_column] == stratum]
            )
            
            # get area from proportion and full area
            category_stratum_area = proportion_category * stratum_area

            # get error from interpreted data for full stratum area
            var = np.var(df_interpreted[category][df_interpreted[strata_column] == stratum])
            sd = np.sqrt(var)               
            n = len(df_interpreted[df_interpreted[strata_column] == stratum])
            se = sd/np.sqrt(n) * stratum_area
                          
            # add for totals
            categories_area += category_stratum_area
            se_total += se**2
            
            # add to dictionary
            d2[f'area_stratum_{stratum}'] = category_stratum_area
            d2[f'ci_stratum_{stratum}']=z_score*se
        
        d2['area_total'] = categories_area
        d2['MOE'] = z_score*np.sqrt(se_total)
        d2['MOE_perc'] =  d2['MOE'] / categories_area * 100
        d[category] = d2
    
    
    return pd.DataFrame.from_dict(d, orient='index')

# 3 Load and prepare interpreted data from various CEO Validation 
#### CIV: EPR 4000 points, 1st Validation 835 points, 2nd Validation 3467 points

In [14]:
pts835 = pd.read_csv('/home/sepal-user/eSBAE_CIV/data/835_final_v2.csv', delimiter=';')
pts3k = pd.read_csv('/home/sepal-user/eSBAE_CIV/data/3K_final_v2.csv', delimiter=';')
erp4k = pd.read_csv('/home/sepal-user/eSBAE_CIV/data/erp4K_final.csv', delimiter=';')

## 3.1 Different data checks and harmonisations 

In [None]:
#pts835.columns.to_list() 

#### land use IPCC categories 2015 (level 1) = 'ocs_n1_2015'
#### land use national classes 2015 (level 2) = 'ocs_n2_2015'
#### land use IPCC categories 2020 (level 1) = 'ocs_n1_2020'
#### land use national classes 2020 (level 2) = 'ocs_n2_2020'

### Columns of interest => 'point_id', 'ocs_n1_2015', 'ocs_n2_2015', 'ocs_n1_2020', 'ocs_n2_2020', 'kmeans', 'source'

In [None]:
# verify and count possible values in different columns

#count_values_pts835 = pts835['ocs_n1_2015'].value_counts()
#count_values_pts835 = pts835['ocs_n1_2020'].value_counts()
#count_values_pts835 = pts835['ocs_n2_2015'].value_counts()
#count_values_pts835 = pts835['ocs_n2_2020'].value_counts()
#count_values_pts835 = pts835['kmeans'].value_counts()
#count_values_pts835 = pts835['source'].value_counts()
#print(count_values_pts835)

In [None]:
#pts3k.columns.to_list()

#### land use IPCC categories 2015 (level 1) = 'ocs_n1_2015'
#### land use national classes 2015 (level 2) = 'ocs_n2_2015'
#### land use IPCC categories 2020 (level 1) = 'ocs_n1_2020'
#### land use national classes 2020 (level 2) = 'ocs_n2_2020'

### Columns of interest => 'point_id', 'ocs_n1_2015', 'ocs_n2_2015', 'ocs_n1_2020', 'ocs_n2_2020', 'kmeans', 'source'

In [None]:
# verify and count possible values in different columns

#count_values_pts3k = pts3k['ocs_n1_2015'].value_counts()
#count_values_pts3k = pts3k['ocs_n1_2020'].value_counts()
#count_values_pts3k = pts3k['ocs_n2_2015'].value_counts()
#count_values_pts3k = pts3k['ocs_n2_2020'].value_counts()
#count_values_pts3k = pts3k['kmeans'].value_counts()
#count_values_pts3k = pts3k['source'].value_counts()
#print(count_values_pts3k)

In [None]:
#erp4k.columns 
#erp4k.columns.to_list()
#print(erp4k.head())


In [15]:
# add column 'source'
erp4k['source']= 'CEOERP'

# rename land use categories column 
#### land use IPCC categories 2015 (level 1 or niveau 1) = 'INT1_n1_2015'
#### land use national classes 2015 (level 2 or niveau 2) = 'INT1_n2_2015'
#### land use IPCC categories 2020 (level 1 or niveau 1) = 'INT1_n1_2020'
#### land use national classes 2020 (level 2 or niveau 2) = 'INT1_n2_2020'
erp4k.rename(columns={'INT1_n1_2015':'ocs_n1_2015'}, inplace=True)
erp4k.rename(columns={'INT1_n2_2015':'ocs_n2_2015'}, inplace=True)
erp4k.rename(columns={'INT1_n1_2020':'ocs_n1_2020'}, inplace=True)
erp4k.rename(columns={'INT1_n2_2020':'ocs_n2_2020'}, inplace=True)
erp4k.rename(columns={'LON':'lon'}, inplace=True)
erp4k.rename(columns={'LAT':'lat'}, inplace=True)
erp4k.columns

Index(['point_id', 'PLOTID', 'lon', 'lat', 'chg_prob', 'kmeans', 'str_dal_ne',
       'ID', 'phyto', 'def_year_1520', 'deg_year_1520', 'str_dal_FA',
       'INT1_2000', 'INT1_n1_2000', 'INT1_n2_2000', 'INT1_2010',
       'INT1_n1_2010', 'INT1_n2_2010', 'INT1_2015', 'ocs_n1_2015',
       'ocs_n2_2015', 'INT1_2020', 'ocs_n1_2020', 'ocs_n2_2020', 'INT1_2021',
       'INT1_n1_2021', 'INT1_n2_2021', 'CHG_00_10', 'CHG_10_15', 'CHG_15_20',
       'CHG_20_21', 'Transition_00_10', 'Transition_10_15', 'Transition_15_20',
       'Transition_20_21', 'Gain_Cohort', 'IC', 'COMMENT', 'Wetlands', 'CNC',
       'chg_yr_1520', 'chg_1520', 'gain_1520', 'def_year2015', 'deg_year2015',
       'def_year2016', 'deg_year2016', 'def_year2017', 'deg_year2017',
       'def_year2018', 'deg_year2018', 'def_year2019', 'deg_year2019',
       'def_year2020', 'deg_year2020', 'gain_year2015', 'gain_year2016',
       'gain_year2017', 'gain_year2018', 'gain_year2019', 'gain_year2020',
       'source'],
      dtype='objec

In [None]:
#duplicated = erp4k['point_id'].duplicated().any() 
#if duplicated:
#    print ("problem")
#else:
#    print ("all good")

In [None]:
# verify and count possible values in different columns

#count_values_erp4k = erp4k['ocs_n1_2015'].value_counts()
#count_values_erp4k = erp4k['ocs_n1_2020'].value_counts()
#count_values_erp4k = erp4k['ocs_n2_2020'].value_counts()
#count_values_erp4k = erp4k['ocs_n2_2020'].value_counts()
#count_values_erp4k = erp4k['kmeans'].value_counts()
#count_values_erp4k = erp4k['source'].value_counts()
#print(count_values_erp4k)

In [None]:
#### transition matrix level 1 - 4k pts ERP

#tmatrix = pd.pivot_table(erp4k,values='source',index=['ocs_n1_2015'],columns=['ocs_n1_2020'],aggfunc="count")
#tmatrix

In [59]:
#### transition matrix level 2 - 4k pts ERP

#tmatrix = pd.pivot_table(erp4k,values='source',index=['ocs_n2_2015'],columns=['ocs_n2_2020'],aggfunc="count")
#tmatrix

# folder path to export data + file name
#file_export = '/home/sepal-user/eSBAE_CIV/data/tmatrixERP.xlsx'
#tmatrix.to_excel(file_export, index=False)

ocs_n2_2020,Autres cultures,Autres terres,Cacao,Cultures perennes,Etablissement humain,Foret dense,Foret secondaire,Plantation agroforestiere,Plantation forestiï¿½re ou reboisement,Terres gramineennes,Terres humides
ocs_n2_2015,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Autres cultures,419.0,,40.0,44.0,1.0,,,1.0,1.0,15.0,1.0
Autres terres,,6.0,,,,,,,,,
Cacao,25.0,,1456.0,29.0,2.0,,,9.0,,37.0,
Cultures perennes,3.0,,3.0,445.0,,,,,,1.0,
Etablissement humain,,,1.0,,58.0,,,,,,
Foret dense,3.0,,2.0,,,379.0,8.0,3.0,,4.0,
Foret secondaire,11.0,,31.0,1.0,1.0,,124.0,8.0,,12.0,
Plantation agroforestiere,13.0,,12.0,2.0,,,,254.0,,6.0,1.0
Terres gramineennes,30.0,,31.0,36.0,2.0,,3.0,,,321.0,
Terres humides,3.0,,1.0,,,,,,,,100.0


In [None]:
#### transition matrix level 2 - 4k pts ERP

#tmatrix = pd.pivot_table(erp4k,values='source',index=['ocs_n1_2020'],columns=['ocs_n2_2020'],aggfunc="count")
#tmatrix

## 3.2. Merge all CEO files into a new dataframe = 8300 points

In [None]:
erp4k.dtypes
#pts835.dtypes
#pts3k.dtypes

In [16]:
erp4k_clean = erp4k[erp4k['point_id'] != 'z']
#len(erp4k) # = 3999
#len(erp4k_clean)  #=3998                                              

In [17]:
erp4k_clean['point_id'] = erp4k_clean['point_id'].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  erp4k_clean['point_id'] = erp4k_clean['point_id'].astype('int64')


In [18]:
##### identifier les colonnes essentielles de CEO
harm_cols = ['point_id', 'ocs_n1_2015', 'ocs_n2_2015', 'ocs_n1_2020', 'ocs_n2_2020', 'kmeans', 'source']

In [None]:
### Concatener les deux tables
#merged_NERP = pd.concat([pts835[harm_cols],pts3k[harm_cols]], axis=0, ignore_index=True)

In [19]:
### Concatener les deux tables
dfconcatCEOfull_db = pd.concat([pts835[harm_cols],pts3k[harm_cols], erp4k_clean[harm_cols]], axis=0, ignore_index=True)
len(dfconcatCEOfull_db) # = 8300

8300

In [20]:
dfconcatCEOfull_db.columns.to_list()

['point_id',
 'ocs_n1_2015',
 'ocs_n2_2015',
 'ocs_n1_2020',
 'ocs_n2_2020',
 'kmeans',
 'source']

In [21]:
duplicado = dfconcatCEOfull_db['point_id'].duplicated().any() 
if duplicado:
    print ("problem")
else:
    print ("all good")

problem


In [22]:
doublonsCEO = dfconcatCEOfull_db[dfconcatCEOfull_db.duplicated(subset='point_id', keep=False)]
print(doublonsCEO)

      point_id          ocs_n1_2015                   ocs_n2_2015  \
136     155643     Terres cultivees                      Anacarde   
427      72898   Terres forestieres                   Foret dense   
544     200349  Terres gramineennes                Savane arboree   
624     275438     Terres cultivees          Amenagement agricole   
825     289002     Terres cultivees             Jachere ou fourre   
841        230   Terres forestieres  Foret secondaire ou degradee   
900       2682     Terres cultivees             Jachere ou fourre   
1636     72898   Terres forestieres                   Foret dense   
2389    155643     Terres cultivees                      Anacarde   
2792    200349  Terres gramineennes                Savane arboree   
3603    275438  Terres gramineennes              Savane arbustive   
3792    289002     Terres cultivees                         Hevea   
5515       230   Terres forestieres              Foret secondaire   
5613      2682     Terres cultivee

In [23]:
dfconcatCEOfull_db['doublon'] = 'no'
dfconcatCEOfull_db['doublon'] = np.where((dfconcatCEOfull_db['point_id'] == 155643) & (dfconcatCEOfull_db['source'] == 'CEO3467'), 'yes', dfconcatCEOfull_db['doublon'])
dfconcatCEOfull_db['doublon'] = np.where((dfconcatCEOfull_db['point_id'] == 72898) & (dfconcatCEOfull_db['source'] == 'CEO3467'), 'yes', dfconcatCEOfull_db['doublon'])
dfconcatCEOfull_db['doublon'] = np.where((dfconcatCEOfull_db['point_id'] == 200349) & (dfconcatCEOfull_db['source'] == 'CEO3467'), 'yes', dfconcatCEOfull_db['doublon'])
dfconcatCEOfull_db['doublon'] = np.where((dfconcatCEOfull_db['point_id'] == 275438) & (dfconcatCEOfull_db['source'] == 'CEO3467'), 'yes', dfconcatCEOfull_db['doublon'])
dfconcatCEOfull_db['doublon'] = np.where((dfconcatCEOfull_db['point_id'] == 289002) & (dfconcatCEOfull_db['source'] == 'CEO3467'), 'yes', dfconcatCEOfull_db['doublon'])

count_values_doublon = dfconcatCEOfull_db['doublon'].value_counts()
print(count_values_doublon)

doublon
no     8295
yes       5
Name: count, dtype: int64


In [38]:
dfconcatCEOfull = dfconcatCEOfull_db[dfconcatCEOfull_db['doublon'] == 'no']
len(dfconcatCEOfull)

8295

In [62]:
##create our classes/redd activities/target column that contains the classes for which you want to get the area estimations ('column of interest')
##Create category column == target column == column of interest

dfconcatCEOfull['redd_n1'] = 'problem'

#IPCC categories level
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] == 'Terres forestieres') & (dfconcatCEOfull['ocs_n1_2020'] != 'Terres forestieres'), 'Def', dfconcatCEOfull['redd_n1'])
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] != 'Terres forestieres') & (dfconcatCEOfull['ocs_n1_2020'] == 'Terres forestieres'), 'Gain', dfconcatCEOfull['redd_n1'])
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] == 'Terres forestieres') & (dfconcatCEOfull['ocs_n1_2020'] == 'Terres forestieres'), 'SF', dfconcatCEOfull['redd_n1'])
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] != 'Terres forestieres') & (dfconcatCEOfull['ocs_n1_2020'] != 'Terres forestieres'), 'SNF', dfconcatCEOfull['redd_n1'])

#new degradation from level 2 (national classes - NERP)
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] == 'Terres forestieres') & (dfconcatCEOfull['ocs_n2_2020'] == 'Foret secondaire ou degradee'), 'Deg', dfconcatCEOfull['redd_n1'])
#new degradation from level 2 (national classes - ERP)
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] == 'Terres forestieres') & (dfconcatCEOfull['ocs_n2_2020'] == 'Foret secondaire'), 'Deg', dfconcatCEOfull['redd_n1'])
#deg forêt naturelles --> agroforêt (01/12/23)
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n2_2015'] == 'Foret secondaire') & (dfconcatCEOfull['ocs_n2_2020'] == 'Plantation agroforestiere'), 'Deg', dfconcatCEOfull['redd_n1'])
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n2_2015'] == 'Foret dense') & (dfconcatCEOfull['ocs_n2_2020'] == 'Plantation agroforestiere'), 'Deg', dfconcatCEOfull['redd_n1'])

#superficie stable de forêt dégradée (hypothèse dans les calculs sera que pertes et gains sont en équilibre, donc émissions nettes = 0)
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n2_2015'] == 'Foret secondaire') & (dfconcatCEOfull['ocs_n2_2020'] == 'Foret secondaire'), 'SDegF', dfconcatCEOfull['redd_n1'])
dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n2_2015'] == 'Foret secondaire ou degradee') & (dfconcatCEOfull['ocs_n2_2020'] == 'Foret secondaire ou degradee'), 'SDegF', dfconcatCEOfull['redd_n1'])

count_values_dfconcatCEOfull = dfconcatCEOfull['redd_n1'].value_counts()
print(count_values_dfconcatCEOfull)

redd_n1
SNF      6219
SF       1532
SDegF     301
Def       203
Deg        21
Gain       19
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfconcatCEOfull['redd_n1'] = 'problem'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfconcatCEOfull['redd_n1'] = np.where((dfconcatCEOfull['ocs_n1_2015'] == 'Terres forestieres') & (dfconcatCEOfull['ocs_n1_2020'] != 'Terres forestieres'), 'Def', dfconcatCEOfull['redd_n1'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

In [None]:
#df_problem = dfconcatCEOfull[dfconcatCEOfull['redd_n1'] == 'Deg']
#df_problem

In [None]:
# folder path to export data + file name
#file_export = '/home/sepal-user/eSBAE_CIV/data/merged_NERP.xlsx'
#merged_NERP.to_excel(file_export, index=False)

#file_export = '/home/sepal-user/eSBAE_CIV/data/dfconcatCEOfull.xlsx'
#dfconcatCEOfull.to_excel(file_export, index=False)

In [63]:
#### transition matrix level 1
tmatrix = pd.pivot_table(dfconcatCEOfull,values='source',index=['redd_n1'],columns=['ocs_n1_2020'],aggfunc="count")
tmatrix

ocs_n1_2020,Autres terres,Etablissement humain,Terres cultivees,Terres forestieres,Terres gramineennes,Terres humides
redd_n1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Def,,4.0,177.0,,21.0,1.0
Deg,,,,21.0,,
Gain,,,,19.0,,
SDegF,,,,301.0,,
SF,,,,1532.0,,
SNF,20.0,98.0,4844.0,,1106.0,151.0


In [None]:
#### transition matrix level 2

#tmatrix = pd.pivot_table(dfconcatCEOfull,values='source',index=['ocs_n2_2015'],columns=['ocs_n2_2020'],aggfunc="count")
#tmatrix

In [41]:
column_of_interest = 'redd_n1' # your classes/redd activities/target column that contains the classes for which you want to get the area estimations

# 4. Load national grid
##### the country 1km grid csv or gpkg file with all the points and columns including strata or kmeans, chg_prob etc.. 
##### For Ivory Coast we have 325631 points 

In [None]:
#df320k = gpd.read_file('/home/sepal-user/eSBAE_CIV/data/grid/cote_ivoire_all_classified_20231114.gpkg')
#df320k.to_parquet('/home/sepal-user/module_results/esbae/Cote_Ivoire_MRV/cote_ivoire_all_classified_20231114.parquet')
#df320k = gpd.read_file('/home/sepal-user/eSBAE_CIV/data/grid/320K.csv') ### USE THIS ONE IF NEED TO REPRODUCE

#db_light = df320k[['point_id','images','lon','lat',
#                   'cnc_1520','cnc_2022', 'CNC_ceo', 'FNF_2015', 'FNF_2022', 'database', 
#                   'fnf_prob_2015', 'fnf_prob_2022', 'fnf_max_prob', 'chg_prob',
#                   'simple_combined', 'multiplied', 'kmeans_chgprob', 'kmeans_simcom','kmeans_multi']]
#db_light.to_csv('/home/sepal-user/eSBAE_CIV/data/grid/bdd_320K_noTS.csv',index=False)

In [30]:
df320k = gpd.read_file('/home/sepal-user/eSBAE_CIV/data/grid/bdd_320K_noTS.csv')

print(len(df320k))
print(df320k.columns)

325631
Index(['point_id', 'images', 'lon', 'lat', 'cnc_1520', 'cnc_2022', 'CNC_ceo',
       'FNF_2015', 'FNF_2022', 'database', 'fnf_prob_2015', 'fnf_prob_2022',
       'fnf_max_prob', 'chg_prob', 'simple_combined', 'multiplied',
       'kmeans_chgprob', 'kmeans_simcom', 'kmeans_multi', 'geometry'],
      dtype='object')


In [31]:
## verify column type before merge
#df320k.dtypes
df320k['point_id'] = df320k['point_id'].astype('int64')

In [None]:
#df320k.head()

In [None]:
# List all columns 
#df320k.columns.tolist()

##### Import grid with ERP/NERP info and phytogeographic zones = 325,631 pts
###### ERP grid = 46,421 pts
###### NERP grid = 279,210 pts

In [32]:
# import grid with PRE zone (y/n) and phyto zone information (ombrophile, mesophile, sub-soudanais et soudanais)
df320k_erp_phytos = gpd.read_file('/home/sepal-user/eSBAE_CIV/data/grid/GRID_CIV1km_ERP_phytoiffn_phytoCSV.csv') ##csv grid with ERP/NERP + 2 phyto zones
len(df320k_erp_phytos)

325631

In [33]:
#df320k_erp_phytos.dtypes
df320k_erp_phytos['point_id'] = df320k_erp_phytos['point_id'].astype('int64')
#df320k_erp_phytos.dtypes

In [34]:
print(df320k_erp_phytos.columns)

Index(['fid', 'point_id', 'LON', 'PLOTID', 'LAT', 'ERPOBJECTI', 'phytoiffnz',
       'phytoZone', 'geometry'],
      dtype='object')


In [64]:
df320k_zones = df320k[['point_id', 'kmeans_multi']].merge(df320k_erp_phytos[['point_id', 'ERPOBJECTI', 'phytoiffnz', 'phytoZone', 'LON', 'LAT']], how='left', on='point_id')
len(df320k_zones)

325631

In [36]:
print(df320k_zones.columns)

Index(['point_id', 'kmeans_multi', 'ERPOBJECTI', 'phytoiffnz', 'phytoZone',
       'LON', 'LAT'],
      dtype='object')


In [None]:
#verif = pd.pivot_table(df320k_zones,values='point_id',index=['ERPOBJECTI'],columns=['phytoZone'],aggfunc="count")
#verif

## 5. FULL dataframe with national GRID (light version) + 8k interpreted points

In [65]:
count_values = dfconcatCEOfull['redd_n1'].value_counts()
print(count_values)

redd_n1
SNF      6219
SF       1532
SDegF     301
Def       203
Deg        21
Gain       19
Name: count, dtype: int64


In [66]:
duplicados = df320k_zones['point_id'].duplicated().any() 
if duplicados:
    print ("problem")
else:
    print ("all good")

all good


In [None]:
#duplicado = dfconcatCEOfull['point_id'].duplicated().any() 
#if duplicado:
#    print ("problem")
#else:
#    print ("all good")
#### correction above

In [67]:
df320k_full = df320k_zones[['point_id', 'kmeans_multi', 'LON', 'LAT', 'ERPOBJECTI', 'phytoiffnz', 'phytoZone']].merge(dfconcatCEOfull[['point_id', 'redd_n1']], how='left', on='point_id')
len(df320k_full)

325633

In [81]:
count_values = df320k_full['redd_n1'].value_counts()
print(count_values)

redd_n1
SNF      6219
SF       1532
SDegF     301
Def       203
Deg        21
Gain       19
Name: count, dtype: int64


In [None]:
###### préparer catégories par zone phyto (zone iffn et autre)

In [None]:
### DA2 category-based models (27/11/2023)
#tmatrix = pd.pivot_table(df_scDA3,values='point_id',index=['redd_n1'],columns=['phytoZone'],aggfunc="count")
#tmatrix

In [82]:
count_values_phyto_iffn = df320k_full['phytoiffnz'].value_counts()
print(count_values_phyto_iffn)

phytoiffnz
Secteur Mesophile       131577
Secteur SubSoudanais     96018
Secteur Ombrophile       56511
Secteur Soudanais        32396
                          6529
Secteur Montagne          2602
Name: count, dtype: int64


In [83]:
df320k_full['phyto_code_if'] = 'nodata'
df320k_full['phyto_code_if'] = np.where(df320k_full['phytoiffnz'] == 'Secteur Mesophile', 'meso', df320k_full['phyto_code_if'])
df320k_full['phyto_code_if'] = np.where(df320k_full['phytoiffnz'] == 'Secteur SubSoudanais', 'subs', df320k_full['phyto_code_if'])
df320k_full['phyto_code_if'] = np.where(df320k_full['phytoiffnz'] == 'Secteur Ombrophile', 'omb', df320k_full['phyto_code_if'])
df320k_full['phyto_code_if'] = np.where(df320k_full['phytoiffnz'] == 'Secteur Soudanais', 'soud', df320k_full['phyto_code_if'])
df320k_full['phyto_code_if'] = np.where(df320k_full['phytoiffnz'] == 'Secteur Montagne', 'mont', df320k_full['phyto_code_if'])

In [84]:
count_values = df320k_full['phyto_code_if'].value_counts()
print(count_values)

phyto_code_if
meso      131577
subs       96018
omb        56511
soud       32396
nodata      6529
mont        2602
Name: count, dtype: int64


In [85]:
## categories par phytozone IFN
df320k_full['redd_pythoif'] = df320k_full['redd_n1'] + '_' + df320k_full['phyto_code_if']                                                                                                                                                                                                                    

In [86]:
count_values = df320k_full['redd_pythoif'].value_counts()
print(count_values)

redd_pythoif
SNF_omb         2569
SNF_meso        2280
SNF_subs         961
SF_omb           563
SF_subs          530
SF_meso          321
SNF_soud         303
SDegF_omb        135
SDegF_meso       124
SF_soud           79
Def_meso          71
SNF_nodata        70
Def_omb           67
Def_subs          51
SNF_mont          36
SF_nodata         31
SDegF_subs        19
Deg_omb           15
SDegF_mont        11
Gain_omb          11
Def_soud          10
SDegF_nodata       9
Gain_meso          8
SF_mont            8
Deg_meso           5
SDegF_soud         3
Def_mont           3
Deg_subs           1
Def_nodata         1
Name: count, dtype: int64


In [87]:
count_values_phyto2 = df320k_full['phytoZone'].value_counts()
print(count_values_phyto2)

phytoZone
Secteur Soudanais        128809
Secteur mésophile         96599
Secteur Ombrophile        57055
Secteur Sub Soudanais     37819
                           5351
Name: count, dtype: int64


In [88]:
df320k_full['phyto_code'] = 'nodata'
df320k_full['phyto_code'] = np.where(df320k_full['phytoZone'] == 'Secteur Soudanais', 'soud', df320k_full['phyto_code'])
df320k_full['phyto_code'] = np.where(df320k_full['phytoZone'] == 'Secteur mésophile', 'meso', df320k_full['phyto_code'])
df320k_full['phyto_code'] = np.where(df320k_full['phytoZone'] == 'Secteur Ombrophile', 'omb', df320k_full['phyto_code'])
df320k_full['phyto_code'] = np.where(df320k_full['phytoZone'] == 'Secteur Sub Soudanais', 'subs', df320k_full['phyto_code'])


In [89]:
count_values = df320k_full['phyto_code'].value_counts()
print(count_values)

phyto_code
soud      128809
meso       96599
omb        57055
subs       37819
nodata      5351
Name: count, dtype: int64


In [90]:
## categories par phytozone
df320k_full['redd_pytho'] = df320k_full['redd_n1'] + '_' + df320k_full['phyto_code'] 

In [91]:
count_values2 = df320k_full['redd_pytho'].value_counts()
print(count_values2)

redd_pytho
SNF_omb         2571
SNF_meso        1947
SNF_soud        1268
SF_soud          612
SF_omb           566
SNF_subs         374
SF_meso          239
SDegF_omb        135
SDegF_meso       117
SF_subs           91
Def_meso          68
Def_omb           67
Def_soud          61
SNF_nodata        59
SF_nodata         24
SDegF_soud        22
SDegF_subs        18
Deg_omb           15
Gain_omb          11
SDegF_nodata       9
Gain_meso          6
Def_subs           6
Deg_meso           4
Gain_subs          2
Deg_subs           1
Deg_soud           1
Def_nodata         1
Name: count, dtype: int64


In [92]:
# Scenario DA 1 :
df_scDA1 = df320k_full[df320k_full['ERPOBJECTI'] != '1']
len(df_scDA1)

279210

In [93]:
count_values = df_scDA1['redd_n1'].value_counts()
print(count_values)

redd_n1
SNF      3096
SF        898
SDegF     176
Def       100
Gain        4
Deg         2
Name: count, dtype: int64


In [None]:
#df320k_full.to_csv('/home/sepal-user/eSBAE_CIV/data/df320k_full.csv',index=False)

In [None]:
# Scenario DA 3 : UTILISER df320k_full
#df_sc_DA3 = df320k_full
#len(df_scDA3)

##### Perform area calculation using the stratum column. In this case the column is called kmeans. Use the merge dataframe (national grid points + CEO validated points)

## 6. Run the eSBAE function for the different scenarios

In [80]:
#scenario DA1: NERF = ERP + NERP
calculate_areas(db_total=df_scDA1, strata_column='kmeans_multi', categories_column='redd_n1', total_area=len(df_scDA1), z_score=1.645)

['SNF' 'SF' 'SDegF' 'Def' 'Deg' 'Gain']
 Calculating stats for SNF
There are 3096 entries of SNF in redd_n1.
 Calculating stats for SF
There are 898 entries of SF in redd_n1.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)


 Calculating stats for SDegF
There are 176 entries of SDegF in redd_n1.
 Calculating stats for Def
There are 100 entries of Def in redd_n1.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)


 Calculating stats for Deg
There are 2 entries of Deg in redd_n1.
 Calculating stats for Gain
There are 4 entries of Gain in redd_n1.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)


Unnamed: 0,area_stratum_0,ci_stratum_0,area_stratum_2,ci_stratum_2,area_stratum_1,ci_stratum_1,area_total,MOE,MOE_perc
SNF,57362.060818,1407.447738,148446.064286,2426.288709,15022.431034,756.949867,220830.556138,2905.298459,1.315623
SF,20926.606181,1323.334425,17379.051429,2256.532155,7777.793103,686.960893,46083.450713,2704.637986,5.869001
SDegF,2357.344965,506.281514,1206.878571,625.565603,3418.465517,503.75174,6982.689054,949.43094,13.596924
Def,2274.631107,497.575172,1568.942143,712.484413,1003.586207,286.42306,4847.159456,915.015442,18.877354
Deg,0.0,0.0,120.687857,198.460608,31.362069,51.560945,152.049926,205.049126,134.856446
Gain,41.356929,68.015189,241.375714,280.565356,31.362069,51.560945,314.094712,293.260151,93.36679


In [None]:
#scenario DA1 par phyto IFFN: 
calculate_areas(db_total=df_scDA1, strata_column='kmeans_multi', categories_column='redd_pythoif', total_area=len(df_scDA1), z_score=1.645)

In [None]:
#scenario DA1 par phyto : 
calculate_areas(db_total=df_scDA1, strata_column='kmeans_multi', categories_column='redd_pytho', total_area=len(df_scDA1), z_score=1.645)

In [None]:
#scenario DA3: NERF = ERP + NERP
calculate_areas(db_total=df320k_full, strata_column='kmeans_multi', categories_column='redd_n1', total_area=len(df320k_full), z_score=1.645)

In [None]:
#scenario DA3: NERF = ERP + NERP par phyto IFFN
calculate_areas(db_total=df320k_full, strata_column='kmeans_multi', categories_column='redd_pythoif', total_area=len(df320k_full), z_score=1.645)

### 7.  Résultats 2020-2022

In [7]:
pts835_22 = pd.read_csv('/home/sepal-user/eSBAE_CIV/data/835_final_v3_2022.csv', delimiter=';')
pts835_22.rename(columns={'oce_n1_2022':'ocs_n1_2022'}, inplace=True)
pts835_22.columns.to_list()

['source',
 'point_id',
 'fid',
 'index',
 'images',
 'mon_images',
 'bfast_chan',
 'bfast_magn',
 'bfast_mean',
 'cusum_chan',
 'cusum_conf',
 'cusum_magn',
 'red_mean',
 'red_sd',
 'red_min',
 'red_max',
 'nir_mean',
 'nir_sd',
 'nir_min',
 'nir_max',
 'swir1_mean',
 'swir1_sd',
 'swir1_min',
 'swir1_max',
 'swir2_mean',
 'swir2_sd',
 'swir2_min',
 'swir2_max',
 'ndfi_mean',
 'ndfi_sd',
 'ndfi_min',
 'ndfi_max',
 'brightness',
 'brightne_1',
 'brightne_2',
 'brightne_3',
 'greenness_',
 'greennes_1',
 'greennes_2',
 'greennes_3',
 'wetness_me',
 'wetness_sd',
 'wetness_mi',
 'wetness_ma',
 'bs_slope_m',
 'bs_slope_s',
 'bs_slope_1',
 'bs_slope_2',
 'ewma_jrc_d',
 'ewma_jrc_c',
 'ewma_jrc_m',
 'mosum_jrc_',
 'mosum_jr_1',
 'mosum_jr_2',
 'cusum_jrc_',
 'cusum_jr_1',
 'cusum_jr_2',
 'ccdc_chang',
 'ccdc_magni',
 'aspect',
 'dw_class_m',
 'dw_tree_pr',
 'dw_tree__1',
 'dw_tree__2',
 'dw_tree__3',
 'elevation',
 'esa_lc20',
 'esa_lc21',
 'esri_lc17',
 'esri_lc18',
 'esri_lc19',
 'esri_lc

In [6]:
pts3k_22 = pd.read_csv('/home/sepal-user/eSBAE_CIV/data/3K_final_v3_2022.csv', delimiter=';')
pts3k_22.columns.to_list()

['source',
 'point_id',
 'fid',
 'index',
 'images',
 'mon_images',
 'bfast_chan',
 'bfast_magn',
 'bfast_mean',
 'cusum_chan',
 'cusum_conf',
 'cusum_magn',
 'red_mean',
 'red_sd',
 'red_min',
 'red_max',
 'nir_mean',
 'nir_sd',
 'nir_min',
 'nir_max',
 'swir1_mean',
 'swir1_sd',
 'swir1_min',
 'swir1_max',
 'swir2_mean',
 'swir2_sd',
 'swir2_min',
 'swir2_max',
 'ndfi_mean',
 'ndfi_sd',
 'ndfi_min',
 'ndfi_max',
 'brightness',
 'brightne_1',
 'brightne_2',
 'brightne_3',
 'greenness_',
 'greennes_1',
 'greennes_2',
 'greennes_3',
 'wetness_me',
 'wetness_sd',
 'wetness_mi',
 'wetness_ma',
 'bs_slope_m',
 'bs_slope_s',
 'bs_slope_1',
 'bs_slope_2',
 'ewma_jrc_d',
 'ewma_jrc_c',
 'ewma_jrc_m',
 'mosum_jrc_',
 'mosum_jr_1',
 'mosum_jr_2',
 'cusum_jrc_',
 'cusum_jr_1',
 'cusum_jr_2',
 'ccdc_chang',
 'ccdc_magni',
 'aspect',
 'dw_class_m',
 'dw_tree_pr',
 'dw_tree__1',
 'dw_tree__2',
 'dw_tree__3',
 'elevation',
 'esa_lc20',
 'esa_lc21',
 'esri_lc17',
 'esri_lc18',
 'esri_lc19',
 'esri_lc

In [8]:
##### identifier les colonnes essentielles de CEO
harm_cols22 = ['point_id', 'ocs_n1_2020', 'ocs_n2_2020', 'ocs_n1_2022', 'ocs_n2_2022','source']

In [10]:
### Concatener les deux tables
dfconcatCEO22 = pd.concat([pts835_22[harm_cols22],pts3k_22[harm_cols22]], axis=0, ignore_index=True)
len(dfconcatCEO22) # = 4302                    


4302

In [11]:
duplicado22 = dfconcatCEO22['point_id'].duplicated().any() 
if duplicado22:
    print ("problem")
else:
    print ("all good")

problem


In [12]:
doublonsCEO22 = dfconcatCEO22[dfconcatCEO22.duplicated(subset='point_id', keep=False)]
print(doublonsCEO22)

      point_id          ocs_n1_2020           ocs_n2_2020         ocs_n1_2022  \
136     155643     Terres cultivees              Anacarde    Terres cultivees   
427      72898     Terres cultivees  Amenagement agricole    Terres cultivees   
544     200349  Terres gramineennes        Savane arboree  Terre gramineennes   
624     275438     Terres cultivees  Amenagement agricole    Terres cultivees   
825     289002     Terres cultivees     Jachere ou fourre    Terres cultivees   
1636     72898   Terres forestieres           Foret dense  Terres forestieres   
2389    155643     Terres cultivees              Anacarde    Terres cultivees   
2792    200349  Terres gramineennes        Savane arboree  Terre gramineennes   
3603    275438  Terres gramineennes      Savane arbustive  Terre gramineennes   
3792    289002     Terres cultivees                 Hevea    Terres cultivees   

               ocs_n2_2022   source  
136               Anacarde   CEO835  
427   Amenagement agricole   CEO

In [25]:
dfconcatCEO22['doublon'] = 'no'
dfconcatCEO22['doublon'] = np.where((dfconcatCEO22['point_id'] == 155643) & (dfconcatCEO22['source'] == 'CEO3467'), 'yes', dfconcatCEO22['doublon'])
dfconcatCEO22['doublon'] = np.where((dfconcatCEO22['point_id'] == 72898) & (dfconcatCEO22['source'] == 'CEO3467'), 'yes', dfconcatCEO22['doublon'])
dfconcatCEO22['doublon'] = np.where((dfconcatCEO22['point_id'] == 200349) & (dfconcatCEO22['source'] == 'CEO3467'), 'yes', dfconcatCEO22['doublon'])
dfconcatCEO22['doublon'] = np.where((dfconcatCEO22['point_id'] == 275438) & (dfconcatCEO22['source'] == 'CEO3467'), 'yes', dfconcatCEO22['doublon'])
dfconcatCEO22['doublon'] = np.where((dfconcatCEO22['point_id'] == 289002) & (dfconcatCEO22['source'] == 'CEO3467'), 'yes', dfconcatCEO22['doublon'])

count_values_doublon22 = dfconcatCEO22['doublon'].value_counts()
print(count_values_doublon22)

doublon
no     4297
yes       5
Name: count, dtype: int64


In [26]:
dfconcatCEO22_clean = dfconcatCEO22[dfconcatCEO22['doublon'] == 'no']
len(dfconcatCEO22_clean)


4297

In [94]:
#dfconcatCEO22_clean.to_csv('/home/sepal-user/eSBAE_CIV/data/dfconcatCEO22_clean.csv',index=False)

In [27]:
#### transition matrix level 1
tmatrix2022 = pd.pivot_table(dfconcatCEO22_clean,values='source',index=['ocs_n1_2020'],columns=['ocs_n1_2022'],aggfunc="count")
tmatrix2022

ocs_n1_2022,Autres terres,Etablissement humain,Terre gramineennes,Terres cultivees,Terres forestieres,Terres humides
ocs_n1_2020,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Autres terres,14.0,,,,,
Etablissement humain,,38.0,,,,
Terres cultivees,,6.0,,2373.0,1.0,
Terres forestieres,,1.0,,29.0,1053.0,
Terres gramineennes,,1.0,698.0,33.0,,
Terres humides,,,,,,50.0


In [28]:
#### transition matrix level 1
tmatrix2022n2 = pd.pivot_table(dfconcatCEO22_clean,values='source',index=['ocs_n2_2020'],columns=['ocs_n2_2022'],aggfunc="count")
tmatrix2022n2


ocs_n2_2022,Affleurement rocheux,Amenagement agricole,Anacarde,Cacao,Cocoteraie,Cours et voie eau,Foret dense,Foret gelerie,Foret secondaire ou degradee,Foret sur sol hydromorphe,...,Palmier,Plan eau,Plantation agroforestiere,Plantation forestiere ou reboisement,Plantation fruitiere,Savane arboree,Savane arbustive,Savane herbeuse,Sol nu,Zones marecageuses
ocs_n2_2020,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Affleurement rocheux,9.0,,,,,,,,,,...,,,,,,,,,,
Amenagement agricole,,976.0,9.0,4.0,,,,1.0,,,...,1.0,,,,2.0,,,,,
Anacarde,,2.0,260.0,,,,,,,,...,,,,,,,,,,
Cacao,,3.0,,363.0,,,,,,,...,,,,,,,,,,
Cocoteraie,,,,,2.0,,,,,,...,,,,,,,,,,
Cours et voie eau,,,,,,18.0,,,,,...,,,,,,,,,,
Foret dense,,21.0,1.0,,,,640.0,,2.0,,...,,,,,,,,,,
Foret gelerie,,1.0,,,,,1.0,142.0,1.0,,...,1.0,,,,,,,,,
Foret secondaire ou degradee,,4.0,,,,,,,174.0,,...,,,,,,,,,,
Foret sur sol hydromorphe,,1.0,,,,,,,,57.0,...,,,,,,,,,,


In [29]:
##create our classes/redd activities/target column that contains the classes for which you want to get the area estimations ('column of interest')
##Create category column == target column == column of interest

dfconcatCEO22_clean['redd_22'] = 'problem'

#IPCC categories level
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] == 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n1_2022'] != 'Terres forestieres'), 'Def', dfconcatCEO22_clean['redd_22'])
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] != 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n1_2022'] == 'Terres forestieres'), 'Gain', dfconcatCEO22_clean['redd_22'])
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] == 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n1_2022'] == 'Terres forestieres'), 'SF', dfconcatCEO22_clean['redd_22'])
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] != 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n1_2022'] != 'Terres forestieres'), 'SNF', dfconcatCEO22_clean['redd_22'])

#new degradation from level 2 (national classes - NERP)
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] == 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n2_2022'] == 'Foret secondaire ou degradee'), 'Deg', dfconcatCEO22_clean['redd_22'])
#new degradation from level 2 (national classes - ERP)
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] == 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n2_2022'] == 'Foret secondaire'), 'Deg', dfconcatCEO22_clean['redd_22'])

#superficie stable de forêt dégradée (hypothèse dans les calculs sera que pertes et gains sont en équilibre, donc émissions nettes = 0)
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n2_2020'] == 'Foret secondaire') & (dfconcatCEO22_clean['ocs_n2_2022'] == 'Foret secondaire'), 'SDegF', dfconcatCEO22_clean['redd_22'])
dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n2_2020'] == 'Foret secondaire ou degradee') & (dfconcatCEO22_clean['ocs_n2_2022'] == 'Foret secondaire ou degradee'), 'SDegF', dfconcatCEO22_clean['redd_22'])

count_values_dfconcatCEO22 = dfconcatCEO22_clean['redd_22'].value_counts()
print(count_values_dfconcatCEO22)

redd_22
SNF      3213
SF        876
SDegF     174
Def        30
Deg         3
Gain        1
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfconcatCEO22_clean['redd_22'] = 'problem'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfconcatCEO22_clean['redd_22'] = np.where((dfconcatCEO22_clean['ocs_n1_2020'] == 'Terres forestieres') & (dfconcatCEO22_clean['ocs_n1_2022'] != 'Terres forestieres'), 'Def', dfconcatCEO22_clean['redd_22'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/ind

In [55]:
df320k_full_NERP22 = df320k_full[['point_id', 'kmeans_multi', 'LON', 'LAT', 'ERPOBJECTI', 'phytoiffnz', 'phytoZone']].merge(dfconcatCEO22_clean[['point_id', 'redd_22']], how='left', on='point_id')
len(df320k_full_NERP22)

325633

In [58]:
#resultats 2022: NERP
calculate_areas(db_total=df320k_full_NERP22, strata_column='kmeans_multi', categories_column='redd_22', total_area=len(df320k_full_NERP22), z_score=1.645)

['SNF' 'SF' 'SDegF' 'Deg' 'Def' 'Gain']
 Calculating stats for SNF
There are 3215 entries of SNF in redd_22.
 Calculating stats for SF
There are 876 entries of SF in redd_22.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)


 Calculating stats for SDegF
There are 174 entries of SDegF in redd_22.
 Calculating stats for Deg
There are 3 entries of Deg in redd_22.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)


 Calculating stats for Def
There are 30 entries of Def in redd_22.
 Calculating stats for Gain
There are 1 entries of Gain in redd_22.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)


Unnamed: 0,area_stratum_0,ci_stratum_0,area_stratum_2,ci_stratum_2,area_stratum_1,ci_stratum_1,area_total,MOE,MOE_perc
SNF,68529.653731,1574.240314,174067.056429,2719.77346,20305.727784,926.339581,262902.437944,3276.202872,1.246167
SF,23064.689552,1498.614174,19885.375714,2602.147041,9537.538808,845.969127,52487.604074,3119.723935,5.943735
SDegF,2562.743284,565.927141,1540.416429,761.017785,4191.902137,618.671641,8295.061849,1132.332195,13.650678
Deg,94.916418,110.350948,0.0,0.0,38.457818,63.227519,133.374236,127.181174,95.356628
Def,1091.538806,372.256311,560.151429,460.065901,115.373453,109.38988,1767.063688,601.831821,34.058298
Gain,47.458209,78.049331,0.0,0.0,0.0,0.0,47.458209,78.049331,164.459075
