# Extract Tree Cover from Hansen 2013

Run this notebook to merge all the separate files downloaded from GEE.


## Imports


In [1]:
# Magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Libraries
import geopandas as gpd
import rasterio
from rasterio.merge import merge
from rasterio.crs import CRS
from tqdm import tqdm
import matplotlib.pyplot as plt
import glob
import sys
from pyprojroot import here

sys.path.insert(0, "../../src")
from run_mp import *
from utilities import *

import chime
chime.theme("mario")

## Merge Raster Parts


In [2]:
def merge_raster_parts(myvar):
    display(f"Extraction for file {myvar}")
    # ! List all tif files of desired variable -----------------------------------------
    all_files = glob.glob(f"../../data/raw/hansen2013_treecover_france/{myvar}/*.tif")
    print("- Number of files: ", len(all_files))
    print("- ", sorted(all_files))

    # ! Merge all files into one --------------------------------------------------------
    filename = here(
        f"data/raw/hansen2013_treecover_france/{myvar}_merged_in_python.tif"
    )

    # List for the data
    src_files_to_mosaic = []
    print("- Loading all files into a list...")
    for file in tqdm(all_files):
        src = rasterio.open(file)
        src_files_to_mosaic.append(src)

    # Merge function returns a single mosaic array and the transformation info
    print("- Merge parts into one...")
    mosaic, out_trans = merge(src_files_to_mosaic)

    # Copy the metadata
    out_meta = src.meta.copy()

    # Update the metadata
    out_meta.update(
        {
            "driver": "GTiff",
            "height": mosaic.shape[1],
            "width": mosaic.shape[2],
            "transform": out_trans,
            "crs": src.crs.to_epsg(),
        }
    )

    # Save the merged raster as a TIFF file
    with rasterio.open(
        filename,
        "w",
        **out_meta,
    ) as dest:
        dest.write(mosaic)

In [3]:
merge_raster_parts("treecover")
merge_raster_parts("lossyear")

'Extraction for file treecover'

Number of files:  16
['../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part1.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part10.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part11.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part12.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part13.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part14.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part15.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part16.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_part2.tif', '../../data/raw/hansen2013_treecover_france/treecover/hansen2013_treecover2000_france_par

100%|██████████| 16/16 [00:00<00:00, 631.19it/s]

Merge parts into one...





'Extraction for file lossyear'

Number of files:  16
['../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part1.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part10.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part11.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part12.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part13.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part14.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part15.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part16.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part2.tif', '../../data/raw/hansen2013_treecover_france/lossyear/hansen2013_lossyear_france_part3.tif', '../../data/raw/hansen2013_treecover_france/lossyea

100%|██████████| 16/16 [00:00<00:00, 1164.48it/s]

Merge parts into one...





In [4]:
# Quality Control.
# ! Outcommented because it can take up to 1 min!
# Open the saved TIFF file and plot it
# with rasterio.open(filename) as src:
#     # Read the raster data
#     raster_data = src.read(1)

#     # Display the raster data
#     plt.imshow(raster_data, cmap="gray")
#     plt.colorbar()
#     plt.show()

## Load NFI Coordinates


In [5]:
from utilities import get_final_nfi_coordinates

nfi_coords = get_final_nfi_coordinates("noisy", "csv")
nfi_coords

Loading noisy coordinates from csv.


Unnamed: 0,SiteID,idp,first_year,y,x,y_fr,x_fr
0,0,500008,2010,48.050171,6.069031,6.776642e+06,9.286128e+05
1,1,500013,2010,49.200153,1.132647,6.901657e+06,5.639015e+05
2,2,500098,2010,44.180056,4.700861,6.343803e+06,8.359781e+05
3,3,500103,2010,48.966657,7.119344,6.881944e+06,1.001476e+06
4,4,500137,2010,48.370072,6.713371,6.814217e+06,9.749062e+05
...,...,...,...,...,...,...,...
41291,41291,1131396,2016,47.790156,3.380200,6.743375e+06,7.284673e+05
41292,41292,1131409,2016,42.686748,2.156438,6.176717e+06,6.307829e+05
41293,41293,1131410,2016,47.899971,7.424664,6.764769e+06,1.030439e+06
41294,41294,1131424,2016,44.924973,3.757202,6.425373e+06,7.597461e+05


## Extract Files


In [6]:
myvar = "treecover"
filename = here(f"data/raw/hansen2013_treecover_france/{myvar}_merged_in_python.tif")

extract_raster_values(
    tiff_file=filename,
    variable_name=myvar,
    latitudes=nfi_coords["y"][:10],
    longitudes=nfi_coords["x"][:10],
    expected_crs="4326",
    progress_bar=True,
)

100%|██████████| 10/10 [00:06<00:00,  1.53it/s]


Unnamed: 0,treecover,Latitude,Longitude
0,94.0,48.050171,6.069031
1,95.0,49.200153,1.132647
2,85.0,44.180056,4.700861
3,0.0,48.966657,7.119344
4,97.0,48.370072,6.713371
5,75.0,48.094909,4.191761
6,79.0,45.404677,1.164141
7,0.0,47.543472,0.931013
8,0.0,48.565523,6.784044
9,30.0,49.134544,-1.249681


In [7]:
df_list = split_df_into_list_of_group_or_ns(nfi_coords, 10)

Splitting df into 10 random groups


In [8]:
from utilities import parallel_hansen2013_extraction

df = run_mp(
    parallel_hansen2013_extraction,
    df_list,
    pd.concat,
    num_cores=8,
    progress_bar=True,
)

  0%|          | 0/10 [00:00<?, ?it/s]

## Save Final Data


In [None]:
df.to_feather(here("data/final/predictor_datasets/treecover.feather"))
df
chime.success()

NameError: name 'df' is not defined