
# Data and methods

In [4]:
import os
import re
import sys
import queue
import zipfile
import logging
import multiprocessing

from rasterio.crs import CRS

import numpy as np
import pandas as pd
import geopandas as gpd

from pathlib import Path
from threading import Thread
from IPython.display import clear_output

notebook = %pwd
sys.path.append(os.path.dirname(os.path.realpath(notebook)))

#from tropicly import *
from tropicly.utils import (orientation_to_int,
                            download,
                            download_worker,
                            write_binary,
                            tile_index,
                            cache_directories, 
                            get_data_dir)

#from tropicly.similarity import jaccard_index
#from tropicly.enums import ESV_costanza, ESV_deGroot, ESV_worldbank


DIRS = cache_directories(get_data_dir())
HEADERS = {'headers': {'User-Agent': "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"}}
WGS84 = CRS.from_epsg(4326)

In [2]:
# log progress to notebook
def progress(*args, **kwargs):
    #clear_output()
    ratio = (kwargs['total'] - kwargs['pending']) / kwargs['total']
    ratio = round(ratio*100, 2)
    print('{} of 100 %'.format(ratio))

def finish(*args, **kwargs):
    clear_output()
    print('Tasks complete')
    

sheduler = TaskSheduler('tasks', 5)
sheduler.on_progress.connect(progress)
sheduler.on_finish.connect(finish)

## Data

### Spatial data

#### Global Forest Change

#### Aboveground live woody biomass density

#### Global soil organic carbon map

#### Intact Forest Landscapes

#### Auxiliary data

## Methods

### Preprocessing

### Deforestation

#### Forest definition

#### Proximate driver

In [3]:
# Needs a test run
mask = gpd.read_file(str(DIRS.auxiliary/'mask.shp'))

processes = []
for idx, row in mask.iterrows():
    treecover = str(DIRS.preproc/row.cover)
    landcover = str(DIRS.preproc/row.gl30_10)
    loss = str(DIRS.preproc/row.loss)
    gain = str(DIRS.preproc/row.gain)
    
    name = 'driver_{}.tif'.format(row.key)
    path = str(DIRS.driver/name)
    
    process = multiprocessing.Process(target=cworker, args=(landcover, treecover, gain, loss, path))
    processes.append(process)

# reset with task sheduler    
execute_concurrent(processes, THREADLIMIT, msg='Finished {} % of 100 %', callback=callback)

Finished 100.0 % of 100 %


In [12]:
# Update mask
mask = gpd.read_file(str(DIRS.auxiliary/'mask.shp'))
regex = re.compile(r'.+(?P<key>\d{2}(?:N|S).\d{3}(?:W|E))\..+')

records = [
    [regex.match(str(item)).group('key'), item.name]
    for item in DIRS.driver.glob('driver*.tif')
]

product = mask.merge(pd.DataFrame(records, columns=['key', 'driver']), on='key')
product.to_file(str(DIRS.auxiliary/'mask.shp'))

#### Accuracy assessment

In [6]:
random = np.random.RandomState(42)
mask = gpd.read_file(str(DIRS.auxiliary/'final_region_mask.shp'))

tiles = []
for region, df in mask.groupby(by='region'):
    if region in ('Africa', 'Americas', 'Asia'):
        tiles += list(random.choice(df.driver_2, size=10))

In [None]:
tiles = ['driver_15S_042E.tif',
 'driver_00N_006E.tif',
 'driver_05N_006E.tif',
 'driver_20S_030E.tif',
 'driver_15N_012W.tif',
 'driver_15S_054E.tif',
 'driver_05S_030E.tif',
 'driver_25N_024E.tif',
 'driver_10S_018E.tif',
 'driver_10S_018E.tif',
 'driver_15S_060W.tif',
 'driver_05S_048W.tif',
 'driver_10N_090W.tif',
 'driver_15N_066W.tif',
 'driver_00N_060W.tif',
 'driver_00N_048W.tif',
 'driver_05S_042W.tif',
 'driver_00N_060W.tif',
 'driver_05S_060W.tif',
 'driver_00N_096W.tif',
 'driver_20N_120E.tif',
 'driver_20N_096E.tif',
 'driver_00N_132E.tif',
 'driver_20N_102E.tif',
 'driver_10N_078E.tif',
 'driver_00N_114E.tif',
 'driver_05N_114E.tif',
 'driver_00N_138E.tif',
 'driver_15N_072E.tif',
 'driver_00N_096E.tif']

In [56]:
records = []

processes = [
    threading.Thread(target=sworker, args=(str(DIRS.driv/f), records),
                            kwargs=({'samples': 200, 'seed': 42}))
    for f in tiles
]

execute_concurrent(processes, THREADLIMIT, msg='Sampling {} % from 100 %', callback=callback)

for idx, samples in enumerate(records):
    name = 'sample_{}.csv'.format(idx + 1)
    df = df.from_dict(samples)
    df.to_csv(str(DIRS.ana/name), index=False)

Sampling 100.0 % from 100 %


#### Emissions

In [3]:
mask = gpd.read_file(DIRS.masks/'aism.shp')

for idx, row in mask.iterrows():
    driver = DIRS.driver/row.driver
    biomass = DIRS.preproc/row.biomass
    name = DIRS.agb/'agb_{}.tif'.format(row.key)
    
    sheduler.add_task(Thread(target=agbworker, args=(driver, biomass, name)))

0.37 of 100 %
0.75 of 100 %
1.12 of 100 %
1.49 of 100 %
2.24 of 100 %
2.61 of 100 %
2.99 of 100 %
3.36 of 100 %
3.73 of 100 %
4.1 of 100 %
4.48 of 100 %
4.85 of 100 %
5.6 of 100 %
5.97 of 100 %
6.72 of 100 %
7.09 of 100 %
7.46 of 100 %
7.84 of 100 %
8.21 of 100 %
8.58 of 100 %
8.96 of 100 %
9.33 of 100 %
9.7 of 100 %
10.07 of 100 %
10.45 of 100 %
10.82 of 100 %
11.19 of 100 %
11.57 of 100 %
11.94 of 100 %
12.31 of 100 %
12.69 of 100 %
13.06 of 100 %
13.43 of 100 %
13.81 of 100 %
14.18 of 100 %
14.55 of 100 %
14.93 of 100 %
15.3 of 100 %
15.67 of 100 %
16.04 of 100 %
16.42 of 100 %
16.79 of 100 %
17.16 of 100 %
17.91 of 100 %
18.28 of 100 %
18.66 of 100 %
19.03 of 100 %
19.4 of 100 %
19.78 of 100 %
20.15 of 100 %
20.52 of 100 %
20.9 of 100 %
21.27 of 100 %
22.01 of 100 %
22.39 of 100 %
22.76 of 100 %
23.51 of 100 %
23.88 of 100 %
24.63 of 100 %
25.37 of 100 %
25.75 of 100 %
26.12 of 100 %
26.49 of 100 %
26.87 of 100 %
27.24 of 100 %
27.61 of 100 %
27.99 of 100 %
28.36 of 100 %
28.73 of 

In [4]:
mask = gpd.read_file(str(DIRS.auxiliary/'final_region_mask.shp'))
regex = re.compile(r'.+(?P<key>\d{2}(?:N|S).\d{3}(?:W|E))\..+')

sheduler = TaskSheduler('soc', 4)
sheduler.on_progress.connect(callback)

for idx, row in mask.iterrows():
    driver = str(DIRS.driver/row.driver_2)
    soc = str(DIRS.legacy/row.soil)
    intact = str(DIRS.preproc/row.ifl)
       
    key = regex.match(row.driver_2).group('key')
    name = 'sc4_soc_min_{}.tif'.format(key)
    namee = 'sc4_soc_max_{}.tif'.format(key)
    nameee = 'sc4_soc_mean_{}.tif'.format(key)
    path_min = str(DIRS.soce/name)
    path_max = str(DIRS.soce/namee)
    path_mean = str(DIRS.soce/nameee)
    
    sheduler.add_task(
        threading.Thread(target=socworker, args=(driver, soc, path_min), kwargs=({'method': 'min',
                                                                                  'intact': intact}))
    )
    sheduler.add_task(
        threading.Thread(target=socworker, args=(driver, soc, path_mean), kwargs=({'method': 'mean',
                                                                                   'intact': intact}))
    )
    sheduler.add_task(
        threading.Thread(target=socworker, args=(driver, soc, path_max), kwargs=({'method': 'max',
                                                                                  'intact': intact}))
    )

0.11 % of 100 %
0.34 % of 100 %
0.46 % of 100 %
0.57 % of 100 %
0.8 % of 100 %
0.92 % of 100 %
1.03 % of 100 %
1.15 % of 100 %
1.26 % of 100 %
1.38 % of 100 %
1.61 % of 100 %
1.72 % of 100 %
1.84 % of 100 %
1.95 % of 100 %
2.07 % of 100 %
2.18 % of 100 %
2.3 % of 100 %
2.41 % of 100 %
2.53 % of 100 %
2.64 % of 100 %
2.76 % of 100 %
2.87 % of 100 %
2.99 % of 100 %
3.1 % of 100 %
3.22 % of 100 %
3.33 % of 100 %
3.45 % of 100 %
3.56 % of 100 %
3.68 % of 100 %
3.79 % of 100 %
3.91 % of 100 %
4.02 % of 100 %
4.14 % of 100 %
4.25 % of 100 %
4.37 % of 100 %
4.48 % of 100 %
4.6 % of 100 %
4.71 % of 100 %
4.83 % of 100 %
4.94 % of 100 %
5.06 % of 100 %
5.17 % of 100 %
5.29 % of 100 %
5.4 % of 100 %
5.52 % of 100 %
5.63 % of 100 %
5.75 % of 100 %
5.86 % of 100 %
5.98 % of 100 %
6.09 % of 100 %
6.21 % of 100 %
6.32 % of 100 %
6.44 % of 100 %
6.55 % of 100 %
6.67 % of 100 %
6.78 % of 100 %
6.9 % of 100 %
7.01 % of 100 %
7.13 % of 100 %
7.24 % of 100 %
7.36 % of 100 %
7.47 % of 100 %
7.59 % of 100 

57.13 % of 100 %
57.24 % of 100 %
57.36 % of 100 %
57.47 % of 100 %
57.59 % of 100 %
57.7 % of 100 %
57.82 % of 100 %
57.93 % of 100 %
58.05 % of 100 %
58.16 % of 100 %
58.28 % of 100 %
58.39 % of 100 %
58.51 % of 100 %
58.62 % of 100 %
58.74 % of 100 %
58.85 % of 100 %
58.97 % of 100 %
59.08 % of 100 %
59.2 % of 100 %
59.31 % of 100 %
59.43 % of 100 %
59.54 % of 100 %
59.66 % of 100 %
59.77 % of 100 %
59.89 % of 100 %
60.0 % of 100 %
60.11 % of 100 %
60.23 % of 100 %
60.34 % of 100 %
60.46 % of 100 %
60.57 % of 100 %
60.69 % of 100 %
60.8 % of 100 %
60.92 % of 100 %
61.03 % of 100 %
61.15 % of 100 %
61.26 % of 100 %
61.38 % of 100 %
61.49 % of 100 %
61.61 % of 100 %
61.72 % of 100 %
61.84 % of 100 %
61.95 % of 100 %
62.07 % of 100 %
62.18 % of 100 %
62.3 % of 100 %
62.41 % of 100 %
62.53 % of 100 %
62.64 % of 100 %
62.76 % of 100 %
62.87 % of 100 %
62.99 % of 100 %
63.1 % of 100 %
63.22 % of 100 %
63.33 % of 100 %
63.45 % of 100 %
63.56 % of 100 %
63.68 % of 100 %
63.79 % of 100 %
63.

In [3]:
regex = re.compile(r'.+(?P<key>\d{2}(?:N|S).\d{3}(?:W|E))\..+')

smin = [str(obj) for obj in DIRS.soce.glob('sc[123]_soc_min*.tif')]
smean = [str(obj) for obj in DIRS.soce.glob('sc[123]_soc_mean*.tif')]
smax = [str(obj) for obj in DIRS.soce.glob('sc[123]_soc_max*.tif')]

smin = sorted(smin, key=lambda name: regex.match(name).group('key'))
smean = sorted(smean, key=lambda name: regex.match(name).group('key'))
smax = sorted(smax, key=lambda name: regex.match(name).group('key'))

In [4]:
import rasterio as rio
bib = defaultdict(list)

In [5]:
for items in [smin, smean, smax]:
    for path in items:
        key = regex.match(path).group('key')
        bib[key].append(path)

In [6]:
def create_record(key, vals, records):
    record = {'tile': key}
    regex = re.compile(r'.*(?P<name>sc\d_soc_\w{3}n?).*')
    
    for idx, val in enumerate(vals):
        total = 0
        name = regex.match(val).group('name')

        with rio.open(val, 'r') as src:
            total = np.sum(src.read(1))
        record[name] = total

    records.append(record)

In [7]:
sheduler = TaskSheduler('sum', 8)
sheduler.on_progress.connect(callback)

records = []
for key, vals in bib.items():
    sheduler.add_task(
        threading.Thread(target=create_record, args=(key, vals, records))
    )

0.37 % of 100 %
0.74 % of 100 %
1.12 % of 100 %
1.49 % of 100 %
1.86 % of 100 %
2.23 % of 100 %
2.6 % of 100 %
2.97 % of 100 %
3.35 % of 100 %
3.72 % of 100 %
4.09 % of 100 %
4.46 % of 100 %
4.83 % of 100 %
5.2 % of 100 %
5.58 % of 100 %
5.95 % of 100 %
6.32 % of 100 %
6.69 % of 100 %
7.06 % of 100 %
7.43 % of 100 %
7.81 % of 100 %
8.18 % of 100 %
8.55 % of 100 %
8.92 % of 100 %
9.29 % of 100 %
9.67 % of 100 %
10.04 % of 100 %
10.41 % of 100 %
10.78 % of 100 %
11.15 % of 100 %
11.52 % of 100 %
11.9 % of 100 %
12.27 % of 100 %
12.64 % of 100 %
13.01 % of 100 %
13.38 % of 100 %
13.75 % of 100 %
14.13 % of 100 %
14.5 % of 100 %
14.87 % of 100 %
15.24 % of 100 %
15.61 % of 100 %
15.99 % of 100 %
16.36 % of 100 %
16.73 % of 100 %
17.1 % of 100 %
17.47 % of 100 %
17.84 % of 100 %
18.22 % of 100 %
18.59 % of 100 %
18.96 % of 100 %
19.33 % of 100 %
19.7 % of 100 %
20.07 % of 100 %
20.45 % of 100 %
20.82 % of 100 %
21.19 % of 100 %
21.56 % of 100 %
21.93 % of 100 %
22.3 % of 100 %
22.68 % of 10

In [9]:
df = pd.DataFrame.from_records(records)

In [13]:
df.to_csv(str(DIRS.ana/'soce.csv'), index=False)

#### Ecosystem Service Values

In [3]:
mask = gpd.read_file(DIRS.auxiliary / 'mask.shp')

for idx, row in mask.iterrows():
    driver = DIRS.driver / row.driver
    
    costanza_names = [DIRS.esv / '{}_loss_{}.tif'.format(ESV_costanza['name'], row.key),
                      DIRS.esv / '{}_gain_{}.tif'.format(ESV_costanza['name'], row.key)]
    groot_names = [DIRS.esv / '{}_loss_{}.tif'.format(ESV_deGroot['name'], row.key),
                   DIRS.esv / '{}_gain_{}.tif'.format(ESV_deGroot['name'], row.key)]
    worldbank_names = [DIRS.esv / '{}_loss_{}.tif'.format(ESV_worldbank['name'], row.key),
                       DIRS.esv / '{}_gain_{}.tif'.format(ESV_worldbank['name'], row.key)]

    sheduler.add_task(Thread(target=esvworker, args=(driver, ESV_costanza, costanza_names)))
    sheduler.add_task(Thread(target=esvworker, args=(driver, ESV_deGroot, groot_names)))
    sheduler.add_task(Thread(target=esvworker, args=(driver, ESV_worldbank, worldbank_names)))

0.12 of 100 %
0.37 of 100 %
0.5 of 100 %
0.62 of 100 %
0.75 of 100 %
0.87 of 100 %
1.0 of 100 %
1.12 of 100 %
1.24 of 100 %
1.49 of 100 %
1.62 of 100 %
1.87 of 100 %
1.99 of 100 %
2.11 of 100 %
2.24 of 100 %
2.36 of 100 %
2.49 of 100 %
2.61 of 100 %
2.74 of 100 %
2.86 of 100 %
3.11 of 100 %
3.23 of 100 %
3.36 of 100 %
3.48 of 100 %
3.61 of 100 %
3.73 of 100 %
3.86 of 100 %
3.98 of 100 %
4.1 of 100 %
4.23 of 100 %
4.35 of 100 %
4.48 of 100 %
4.6 of 100 %
4.73 of 100 %
4.85 of 100 %
4.98 of 100 %
5.1 of 100 %
5.22 of 100 %
5.35 of 100 %
5.47 of 100 %
5.6 of 100 %
5.72 of 100 %
5.85 of 100 %
6.09 of 100 %
6.22 of 100 %
6.47 of 100 %
6.59 of 100 %
6.72 of 100 %
6.84 of 100 %
6.97 of 100 %
7.09 of 100 %
7.21 of 100 %
7.34 of 100 %
7.46 of 100 %
7.59 of 100 %
7.71 of 100 %
7.84 of 100 %
7.96 of 100 %
8.08 of 100 %
8.21 of 100 %
8.33 of 100 %
8.46 of 100 %
8.71 of 100 %
8.83 of 100 %
9.08 of 100 %
9.33 of 100 %
9.45 of 100 %
9.58 of 100 %
9.7 of 100 %
9.83 of 100 %
9.95 of 100 %
10.07 of 100 

72.14 of 100 %
72.26 of 100 %
72.39 of 100 %
72.51 of 100 %
72.64 of 100 %
72.76 of 100 %
72.89 of 100 %
73.01 of 100 %
73.13 of 100 %
73.26 of 100 %
73.38 of 100 %
73.51 of 100 %
73.63 of 100 %
73.76 of 100 %
73.88 of 100 %
74.0 of 100 %
74.13 of 100 %
74.25 of 100 %
74.38 of 100 %
74.5 of 100 %
74.75 of 100 %
74.88 of 100 %
75.12 of 100 %
75.25 of 100 %
75.37 of 100 %
75.5 of 100 %
75.62 of 100 %
75.75 of 100 %
75.87 of 100 %
76.0 of 100 %
76.12 of 100 %
76.24 of 100 %
76.37 of 100 %
76.49 of 100 %
76.62 of 100 %
76.74 of 100 %
76.87 of 100 %
76.99 of 100 %
77.11 of 100 %
77.24 of 100 %
77.36 of 100 %
77.49 of 100 %
77.61 of 100 %
77.74 of 100 %
77.86 of 100 %
77.99 of 100 %
78.11 of 100 %
78.23 of 100 %
78.36 of 100 %
78.61 of 100 %
78.73 of 100 %
78.86 of 100 %
78.98 of 100 %
79.1 of 100 %
79.23 of 100 %
79.35 of 100 %
79.48 of 100 %
79.6 of 100 %
79.73 of 100 %
79.85 of 100 %
79.98 of 100 %
80.1 of 100 %
80.22 of 100 %
80.35 of 100 %
80.47 of 100 %
80.6 of 100 %
80.72 of 100 %
80.

### Evaluation

#### Class harmonization

In [None]:
from bokeh.transform import jitter
from bokeh.layouts import gridplot
from collections import defaultdict
from bokeh.models import HoverTool, FactorRange
from bokeh.plotting import output_notebook, show, figure, ColumnDataSource
# force bokeh plot output to jupyter notebook
output_notebook()

In [2]:
src = pd.read_csv(str(DIRS.ana / 'harmonization_americas.csv'))
src = src.append(pd.read_csv(str(DIRS.ana / 'harmonization_africa.csv')))
src = src.append(pd.read_csv(str(DIRS.ana / 'harmonization_asia.csv')))

# initial data clean up
zeros = src[src['JC00'] == 0]
src.drop(zeros.index, axis=0, inplace=True)
src.rename(columns=lambda x: x.upper() if x[:2] == 'jc' else x, inplace=True)
src.dropna(axis=0, how='any', inplace=True)
src.columns = 'JC0 JC10 JC20 JC30 tile region'.split()

# scatterplot data prep
melted = src.melt(id_vars='tile region'.split(), var_name='jc_class', value_name='score')
melted['colors'] = '#ffffff'
melted.loc[melted['jc_class'] == 'JC0', 'colors'] = '#e66101'
melted.loc[melted['jc_class'] == 'JC10', 'colors'] = '#fdb863'
melted.loc[melted['jc_class'] == 'JC20', 'colors'] = '#b2abd2'
melted.loc[melted['jc_class'] == 'JC30', 'colors'] = '#5e3c99'
melted.sort_values(by=['region', 'jc_class'], ascending=[True, True], inplace=True)

# boxplot data prep
frames = []
for key, df in src.groupby('region'):
    boxplot = df.quantile(q=(0.25, 0.5, 0.75)).T
    boxplot.columns = ['q1', 'q2', 'q3']
    boxplot['iqr'] = boxplot.q3 - boxplot.q1
    
    boxplot['tlw'] = boxplot.q1 - 1.5 * boxplot.iqr
    boxplot['tuw'] = boxplot.q3 + 1.5 * boxplot.iqr
    boxplot['miw'] = df.min()
    boxplot['maw'] = df.max()   
    boxplot['lower_whisker'] = boxplot.apply(lambda row: row.miw if row.tlw < row.miw else row.tlw, axis=1)
    boxplot['upper_whisker'] = boxplot.apply(lambda row: row.maw if row.tuw > row.maw else row.tuw, axis=1)
    
    boxplot['means'] = df.mean()
    boxplot['region'] = pd.unique(df.region)[0]

    frames.append(boxplot)

box = pd.concat(frames)

In [3]:
# titel and histogram
# scatterplot

source = ColumnDataSource({'x': list(zip(melted.region, melted.jc_class)),
                           'y': melted.score,
                           'id': melted.tile,
                           'colors': melted.colors})
hover = HoverTool(tooltips=[('Region/Class', '@x'),
                            ('Tile', '@id'),
                            ('JC-Score', '@y'),])
factors = [(reg, cls) 
           for reg in pd.unique(melted.region) 
           for cls in pd.unique(melted.jc_class)]

scatter = figure(x_range=FactorRange(*factors), plot_width=950, plot_height=600,
              tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset', 'box_zoom'],
              title="Jaccard score per forest cover class")

scatter.x(x=jitter('x', width=0.6, range=scatter.x_range), y='y', color='colors', source=source)

scatter.xgrid.grid_line_color = None
scatter.xaxis.axis_label = "Region/Class"
scatter.yaxis.axis_label = "Jaccard score"
scatter.y_range.start = -0.01

# boxplot
source = ColumnDataSource({'x': list(zip(box.region, box.index)),
                           'q1': box.q1,
                           'q2': box.q2,
                           'q3': box.q3,
                           'iqr': box.iqr,
                           'lw': box.lower_whisker,
                           'uw': box.upper_whisker,
                           'means': box.means})
hover = HoverTool(tooltips=[("Region/Class", "@x"),
                            ("Q1", "@q1"),
                            ("Q2", "@q2"),
                            ("Q3", "@q3"),
                            ("IQR", "@iqr"),
                            ("lWhisker", "@lw"),
                            ("uWhisker", "@uw"),
                            ("Mean", "@means"),])

plot = figure(x_range=scatter.x_range, y_range=scatter.y_range,
              plot_width=950, plot_height=300,
              tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset', 'box_zoom'])

# box
plot.vbar(x='x', width=0.7, bottom='q1', top='q2',
          line_color='black', fill_color='#f7f7f7', fill_alpha=0.7, source=source)
plot.vbar(x='x', width=0.7, bottom='q2', top='q3',
          line_color='black', fill_color='#f7f7f7', fill_alpha=0.7, source=source)

# whiskers
plot.rect(x='x', y='lw', width=0.2, height=0.00001,
          line_color="black", source=source)
plot.rect(x='x', y='uw', width=0.2, height=0.00001,
          line_color="black", source=source)

# stems
plot.segment(x0='x', y0='lw', x1='x', y1='q1',
             line_color='black', source=source)
plot.segment(x0='x', y0='q3', x1='x', y1='uw',
             color='black', source=source)

# mean cross
plot.x(x='x', y='means', color='#ff0000', size=10, source=source)

plot.xgrid.grid_line_color = None

plot.xaxis.axis_label = "Region/Class"
plot.xaxis.axis_label_text_font = 'times'
plot.xaxis.axis_label_text_color = 'black'
plot.xaxis.axis_label_text_font_style = 'normal'
plot.xaxis.axis_label_text_font_size = '12pt'
plot.xaxis.group_text_font = 'times'
plot.xaxis.group_text_color = 'black'
plot.xaxis.group_text_font_style = 'normal'
plot.xaxis.group_text_font_size = '10pt'
plot.xaxis.major_label_text_font = 'times'
plot.xaxis.major_label_text_color = 'black'
plot.xaxis.major_label_text_font_style = 'normal'
plot.xaxis.major_label_text_font_size = '10pt'

plot.yaxis.axis_label = "Jaccard score"
plot.yaxis.axis_label_text_font = 'times'
plot.yaxis.axis_label_text_color = 'black'
plot.yaxis.axis_label_text_font_style = 'normal'
plot.yaxis.axis_label_text_font_size = '12pt'

plot.y_range.start = -0.01

# display plots
show(gridplot([[scatter],[plot]]))

#### Accuracy assessment

In [2]:
reference = []
prediction = []

for v in DIRS.ana.glob('sample_*.csv'):
    df = pd.read_csv(str(v))
    prediction += list(df.label)
    reference += list(df.validation)

cm = ConfusionMatrix.from_lists(reference, prediction)
com = cm.normalize()
om = cm.normalize(method='omission')

#### Deforestation drivers maps

In [13]:
import rasterio as rio
path = '/home/tobi/Documents/Master/code/python/susa/colombia/driver_colombia.tif'

ra = rio.open(path)
grid = PolygonGrid(ra.bounds, GridPolygon.regular_hexagon(area=0.5))
records = []

for cell in grid:
    records.append(cell)
    sheduler.add_task(Thread(target=eworker, args=(path, cell, compute_driver, records)))

In [14]:
geometry = []
for val in records:
    geometry.append(val['geometry'])
    val.pop('geometry')

In [5]:
for val in records:
    if not isinstance(val['mean'], (float, int)):
        val['mean'] = 0

In [15]:
df = pd.DataFrame.from_records(records)
df.fillna(0, inplace=True)
df = df.astype(int, inplace=True)
gdf = gpd.GeoDataFrame(df, geometry=geometry)
gdf.crs = {'init': 'epsg:4326'}
gdf.to_file('/home/tobi/Documents/Master/code/python/susa/colombia/legend.shp')

#### Deforestation drivers statistics

In [11]:
mask = gpd.read_file(str(DIRS.auxiliary/'mask.shp'))
records = []

threads = [
    threading.Thread(target=fworker, args=(str(DIRS.driver / row.drivee), records, row.key, row.region)) 
    for idx, row in mask.iterrows()
]

# reset with task sheduler
execute_concurrent(threads, THREADLIMIT, msg='Frequenc {} % of 100 %', callback=callback)

Frequenc 100.0 % of 100 %


In [45]:
df = pd.DataFrame.from_records(records)
df.fillna(0, inplace=True)
df['total_px'] = df.apply(lambda row: sum(row[:9]) + sum(row[11:]) , axis=1)
df['data_px'] = df.apply(lambda row: sum(row[1:9]) + sum(row[11:13]), axis=1)
df['relevant_px'] = df.apply(lambda row: sum(row[2:9]) + sum(row[11:13]), axis=1)

df.to_csv(str(DIRS.ana / 'driver.csv'), index=False)

# Results

## Preprocessing

#### Class harmonization

# Discussion

# References

[<a id="cit-Hansen2013" href="#call-Hansen2013">1</a>] C. M., V. P., Moore R. <em>et al.</em>, ``_High-Resolution Global Maps of 21st-Century Forest Cover Change_'', Science, vol. 342, number 6160, pp. 850--853, November 2013.

[<a id="cit-Li2017" href="#call-Li2017">2</a>] Li Yan, Sulla-Menashe Damien, Motesharrei Safa <em>et al.</em>, ``_Inconsistent estimates of forest cover change in China between 2000 and 2013 from multiple datasets: differences in parameters, spatial resolution, and definitions_'', Scientific Reports, vol. 7, number 8748, pp. , August 2017.

[<a id="cit-Hansen2013a" href="#call-Hansen2013a">3</a>] C. M., V. P., Moore R. <em>et al.</em>, ``_Supplementary Materials for: High-Resolution Global Maps of 21st-Century Forest Cover Change_'', Sciene, vol. 342, number 6160, pp. 1--32, November 2013.  [online](http://science.sciencemag.org/content/suppl/2013/11/14/342.6160.850.DC1)

[<a id="cit-Tropek2014" href="#call-Tropek2014">4</a>] Tropek Robert, Sedl{\'{a}}{\v{c}}ek Ond{\v{r}}ej, Beck Jan <em>et al.</em>, ``_Comment on High-resolution global maps of 21st-century forest cover change_'', Science, vol. 344, number 981, pp. ,  2014.

[<a id="cit-Bellot2014" href="#call-Bellot2014">5</a>] Bellot Franz-Fabian, Bertram Mathias, Navratilb Peter <em>et al.</em>, ``_The high-resolution global map of 21st-century forest cover change from the University of Maryland (Hansen Map) is hugely overestimating deforestation in Indonesia_'', FORCLIME Press release, vol. , number , pp. ,  2014.  [online](http://www.forclime.org/documents/press_release/FORCLIME_Overestimation%20of%20Deforestation.pdf)

[<a id="cit-Li2017a" href="#call-Li2017a">6</a>] Li Yan, Sulla-Menashe Damien, Motesharrei Safa <em>et al.</em>, ``_Supplementary Information for Inconsistent estimates of forest cover change in China between 2000 and 2013 from multiple datasets_'', Scientific reports, vol. 7, number 8748, pp. , August 2017.

