# Labelled carbon stock dataset

In [None]:
import os
import pandas as pd
import xarray as xr

## Carbon stock

In [None]:
path_soilc = '../data/ISRIC_report_2008_02_csv/'

In [None]:
os.listdir(path_soilc)

['WISE3_LABcodes_Description.csv',
 'WISE3_HORIZON.csv',
 'WISE3_coding_conventions.csv',
 'WISE3__ReadMeFirst.csv',
 'WISE3_LABname.csv',
 'WISE3_LABname_codes.csv',
 'WISE3_SITE.csv',
 'WISE3_SOURCE.csv']

Calculate carbon stock.

In [None]:
def get_carbon_stock(path_csv, depth=100):
    horizon = pd.read_csv(path_csv)
    horizon.columns = [c.lower() for c in horizon.columns]

    cols = ['wise3_id', 'topdep', 'botdep', 'orgc', 'bulkdens']
    df = horizon[cols].copy()

    df.loc[:,'botdep_trunc'] = df['botdep'].clip(upper=depth)
    df.loc[:,'segment_length'] = df['botdep_trunc'] - df['topdep']
    df.loc[:,'segment_length'] /= depth

    df = df[['wise3_id', 'segment_length', 'orgc', 'bulkdens']]

    df = df[
        (df.bulkdens.notnull() & df.orgc.notnull() 
         & df.segment_length.gt(0))
    ]

    df.loc[:, 'weighted_orgc_voldens'] = (
        df['segment_length'] * df['orgc'] * df['bulkdens']
    )

    df = df.groupby('wise3_id').weighted_orgc_voldens.sum()
    df = pd.DataFrame(df)
    df.rename(
        {'weighted_orgc_voldens': 'carbon_stock'}, axis=1, inplace=True)
    
    return df

In [None]:
carbon_stock = get_carbon_stock(f'{path_soilc}/WISE3_HORIZON.csv', depth=100)

Get (longitude, latitude)s of profiles.

In [None]:
site = pd.read_csv(f'{path_soilc}/WISE3_SITE.csv')
site.columns = [c.lower() for c in site.columns]

In [None]:
site.columns

Index(['wise3_id', 'iso', 'descr', 'dateyr', 'datemon', 'soldep', 'hornum',
       'fao_90', 'pha_90', 'fao_74', 'pha_74', 'wrb2006', 'uscl', 'usyr',
       'local', 'londd', 'latdd', 'lonlat_acc', 'locat', 'koppen', 'altit',
       'lform', 'posit', 'slope', 'parmat', 'drain', 'landus', 'source_id',
       'pitref', 'lab_id', 'country'],
      dtype='object')

In [None]:
df = site[['wise3_id', 'londd', 'latdd']].copy()

In [None]:
carbon_stock = pd.merge(df, carbon_stock, how='inner', left_on='wise3_id', right_on='wise3_id')

In [None]:
carbon_stock.head()

Unnamed: 0,wise3_id,londd,latdd,carbon_stock
0,AL0007,19.99556,41.06944,12.11205
1,AL0008,20.64722,40.215,6.79968
2,AL0009,20.78444,40.63722,10.18997
3,AL0010,19.48361,40.34528,15.78198
4,AL0011,19.77028,41.37778,17.50674


## MODCF intraannualSD

In [None]:
%%time

da = xr.open_rasterio('../data/MODCF_intraannualSD.tif')

CPU times: user 59.8 ms, sys: 24.3 ms, total: 84.1 ms
Wall time: 144 ms


In [None]:
x = xr.DataArray(carbon_stock.londd)
y = xr.DataArray(carbon_stock.latdd)

In [None]:
%%time

data = da.sel(band=1, x=x, y=y, method='nearest').values

CPU times: user 6.92 s, sys: 980 ms, total: 7.9 s
Wall time: 9.36 s


In [None]:
carbon_stock.loc[:,'intraannualSD'] = data

In [None]:
carbon_stock

Unnamed: 0,wise3_id,londd,latdd,carbon_stock,intraannualSD
0,AL0007,19.99556,41.06944,12.11205,1268
1,AL0008,20.64722,40.21500,6.79968,1115
2,AL0009,20.78444,40.63722,10.18997,1245
3,AL0010,19.48361,40.34528,15.78198,1808
4,AL0011,19.77028,41.37778,17.50674,1457
...,...,...,...,...,...
3640,ZW0060,25.08333,-18.08333,12.94279,2888
3641,ZW0062,31.63333,-21.00000,10.39402,1782
3642,ZW0063,31.08333,-17.71667,11.58268,2481
3643,ZW0065,28.50000,-20.50000,8.96388,2323
