As all FAO file share the same raster, this notebook uses the output of AgroPotentialYield.ipynb and maps the preprocessed raster to every other FAO file, producing a consolidated output csv.

In [1]:
import os
import numpy as np
import pandas as pd
import shutil

import yaml

from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path

import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from projections import raster, utils

pd.set_option('max_columns', None)



In [2]:
def aggregate_files_with_same_raster(in_out_path):
    in_path, out_path = in_out_path
    in_path = BASE_READ_PATH / in_path
    out_path = BASE_OUTPUT_PATH / out_path
    out_path.mkdir(exist_ok=True)
    
    if not shares_the_same_coordinates_as_potential_yield(in_path):
        return {'in_path': in_path, 'errors': ['Wrong coordinates']}
    
    errors = []
    for file in in_path.glob('*.tif'):
        save_path = out_path / file.name
        if save_path.with_suffix(".csv").exists():
            continue
            
        file_path = out_path / file.name[:-4]
        file_path.mkdir(exist_ok=True)

        image = utils.read_tif(file)
        increment = raster.get_increment_from_tif(image)

        for df_path in BY_COUNTRY_PATH.glob('*.feather'):
            country_save_path = file_path / df_path.name
            if country_save_path.exists():
                continue
                
            df = pd.read_feather(df_path)
            pol = utils.get_mock_polygon_from_df(df, increment=increment)
            subdf = raster.merge_df_to_array_by_lat_lon(df, image, pol)
            if subdf.empty:
                errors.append(f'{df_path.name} is empty')
            else:
                subdf.to_feather(country_save_path)

        utils.aggregate_feather_splits_and_save(file_path, save_path, no_data_value=-9999)
        shutil.rmtree(file_path)
    return {'in_path': in_path, 'errors': errors}
    
    
def shares_the_same_coordinates_as_potential_yield(path):
    image = utils.read_tif('../Data/FAO/Agro-ClimaticPotentialYield/Alfalfa.tif')
    x = image.x
    y = image.y

    for tif in path.glob('*.tif'):
        image = utils.read_tif(tif)
        if not (np.all(x == image.x) and np.all(y == image.y)):
            return False
    return True

In [3]:
BASE_READ_PATH = Path('../Data/FAO/')
BASE_OUTPUT_PATH = Path('../Output/FAO/')
potential_yield_path = Path('../Output/FAO/Agro-ClimaticPotentialYield')
BY_COUNTRY_PATH = potential_yield_path / 'by_country'

In [4]:
in_out_paths = {
    'Gap/AggregateYieldAchievementRatio': 'AggregateYieldAchievementRatio',
    'Gap/CropApparentProductionGap': 'CropApparentProductionGap',
    'Gap/CropYieldAchievementRatio': 'CropYieldAchievementRatio',
    'Suitability_AttainableYield/SuitabilityIndex': 'SuitabilityIndex',
    'Suitability_AttainableYield/Potential_Output_Density': 'Potential_Output_Density',
    'ActualProduction/Area': 'ActualProductionArea',
    'ActualProduction/Production': 'ActualProduction',
    'ActualProduction/Production_Value': 'ActualProductionValue',
    'ActualProduction/Yield': 'ActualProductionYield',
}

# Additional files with same raster
**Requires output from AgroPotentialYield**

In [5]:
from tqdm import tqdm
from functools import partialmethod

tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)

In [6]:
n_processes = min(len(in_out_paths), 12)

if n_processes == 1:
    for in_path, out_path in in_out_paths.items():
        print(in_path)
        result = aggregate_files_with_same_raster((in_path, out_path))
        print(result)
else:
    with ProcessPoolExecutor(n_processes) as tpe:
        for result in tpe.map(aggregate_files_with_same_raster, in_out_paths.items()):
            print(result)

{'in_path': PosixPath('../Data/FAO/Gap/AggregateYieldAchievementRatio'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/Gap/CropApparentProductionGap'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/Gap/CropYieldAchievementRatio'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/Suitability_AttainableYield/SuitabilityIndex'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/Suitability_AttainableYield/Potential_Output_Density'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/ActualProduction/Area'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/ActualProduction/Production'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/ActualProduction/Production_Value'), 'errors': []}
{'in_path': PosixPath('../Data/FAO/ActualProduction/Yield'), 'errors': []}


In [7]:
def read_agro_df(file):
    df = utils.robust_read(file)
    value_name = f'{file.parent.name}_{file.with_suffix("").name}'
    df.rename(columns={'value': value_name}, inplace=True)
    return df

In [8]:
df = None

for file in tqdm(BASE_OUTPUT_PATH.glob('**/*.csv')):
    if file.parent.name == 'partial':
        continue
        
    field = read_agro_df(file)
    if field.empty:
        continue
    elif df is None:
        df = field
    else:
        df = df.merge(field.drop(columns='intersection_area'), on='id', how='outer')

In [9]:
df.to_csv(BASE_OUTPUT_PATH / 'all_FAO.csv', index=False)