In [None]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import tarfile

import shutil
import yaml

from functools import partial
from glob import glob, iglob
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from projections.shapefiles import load_shapes, iter_records
from projections.models import Records
from projections.elevation import get_indices_by_file
from projections import raster, utils


pd.set_option('max_columns', None)

In [2]:
def save_location_mapping(row_and_path):
    row, path = row_and_path
    shape = row['geometry']
    
    subset = raster.find_subset_with_intersection_area(IMAGE, shape)

    if subset.empty:
        with open(path, 'w') as f:
            f.write('')
        return

    subset['id'] = row['id']
    
    subset.to_csv(path, index=False)        

In [3]:
base_file = "Alfalfa.tif"
read_path = Path('../Data/FAO/Agro-ClimaticPotentialYield')
output_path = Path('../Output/FAO/Agro-ClimaticPotentialYield/')
partial_path = output_path / 'partial'
by_country_path = output_path / 'by_country'

output_path.mkdir(exist_ok=True)
partial_path.mkdir(exist_ok=True)
by_country_path.mkdir(exist_ok=True)

In [4]:
# Validate that all TIF share the same coordinates
x = None
y = None

for tif in read_path.glob('*.tif'):
    image = utils.read_tif(tif)
    if x is None:
        x = image.x
        y = image.y
    else:
        assert np.all(x == image.x) and np.all(y == image.y), tif
        
no_data_value = image._FillValue

# Map raster to polygons

In [5]:
geo_df = gpd.read_file('../Shapefiles/preprocessed/all_countries_with_eth.shp')

In [6]:
n_processes = 30

IMAGE = utils.read_tif(read_path / base_file)

iterator = partial(utils.yield_missing_shapes, save_path=partial_path, prefix=base_file[:-4])

if n_processes == 1:
    for row_and_path in tqdm(iterator(geo_df)):
        save_location_mapping(row_and_path)
else:
    with ThreadPoolExecutor(n_processes) as tpe:
        for _ in tqdm(
            tpe.map(save_location_mapping, iterator(geo_df)), 
            total=geo_df.shape[0]
        ):
            pass

 97%|██████████████████████████████▉ | 118888/122772 [12:55:27<25:20,  2.56it/s]


# Union portions from different files and shapes

In [7]:
df_by_region = {}
for file in tqdm(partial_path.glob('*.csv'), desc='Reading'):
    try:
        df = pd.read_csv(file)
    except pd.errors.EmptyDataError:
        continue
        
    if 'id' not in df.columns:
        df['id'] = df['adm2']
        df['id'].fillna(df['adm1'], inplace=True)
        df['id'].fillna(df['adm0'], inplace=True)
    region = df.loc[0, 'id']
    df_by_region.setdefault(region, []).append(df)

for region, dfs in tqdm(df_by_region.items(), desc='Saving'):
    df = utils.combine_dataframes(dfs)
    df.to_feather(by_country_path / f'{region}.feather')
    
del df_by_region

Reading: 122772it [02:37, 779.54it/s] 
Saving: 100%|████████████████████████████| 47728/47728 [01:35<00:00, 501.93it/s]


# Aggregate

In [8]:
utils.aggregate_feather_splits_and_save(by_country_path, output_path / base_file, no_data_value=-9999)

Grouping: 100%|██████████████████████████| 47728/47728 [04:50<00:00, 164.58it/s]


# Additional files with same raster

In [9]:
for file in read_path.glob('*.tif'):
    print(file.name)
    file_path = output_path / file.name[:-4]
    file_path.mkdir(exist_ok=True)

    IMAGE = utils.read_tif(file)
    increment = raster.get_increment_from_tif(IMAGE)
    
    for df_path in tqdm(by_country_path.glob('*.feather')):
        df = pd.read_feather(df_path)
        pol = utils.get_mock_polygon_from_df(df, increment=increment)
        subdf = raster.merge_df_to_array_by_lat_lon(df, IMAGE, pol)
        if subdf.empty:
            print(df_path.name, 'is empty')
        else:
            subdf.to_feather(file_path / df_path.name)
            
    utils.aggregate_feather_splits_and_save(file_path, output_path / file.name, no_data_value=-9999)
    shutil.rmtree(file_path)

Soy.tif


47728it [08:17, 96.02it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:40<00:00, 170.06it/s]


Yam.tif


47728it [07:59, 99.54it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:39<00:00, 170.49it/s]


Tea.tif


47728it [07:48, 101.95it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:41<00:00, 169.50it/s]


Oat.tif


47728it [07:48, 101.81it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:36<00:00, 172.81it/s]


Rye.tif


47728it [07:38, 104.15it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:39<00:00, 170.62it/s]


Flax.tif


47728it [08:22, 94.96it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:34<00:00, 173.98it/s]


Gram.tif


47728it [08:11, 97.18it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:56<00:00, 161.00it/s]


Olive.tif


47728it [08:10, 97.28it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:54<00:00, 161.89it/s]


Cocoa.tif


47728it [07:44, 102.81it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:35<00:00, 173.53it/s]


Wheat.tif


47728it [08:16, 96.06it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:43<00:00, 168.42it/s]


Onion.tif


47728it [08:08, 97.67it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:49<00:00, 164.78it/s]


Maize.tif


47728it [07:37, 104.28it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:57<00:00, 160.35it/s]


Grass.tif


47728it [08:14, 96.56it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:28<00:00, 177.97it/s]


Millet.tif


47728it [08:09, 97.57it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:32<00:00, 175.04it/s]


Carrot.tif


47728it [07:19, 108.48it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:34<00:00, 173.70it/s]


Coffee.tif


47728it [07:38, 104.05it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:27<00:00, 178.40it/s]


Citrus.tif


47728it [07:50, 101.52it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:48<00:00, 165.59it/s]


Cotton.tif


47728it [08:18, 95.71it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:01<00:00, 158.41it/s]


Banana.tif


47728it [08:18, 95.65it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:46<00:00, 166.67it/s]


Tomato.tif


47728it [08:01, 99.04it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:45<00:00, 166.99it/s]


Cowpea.tif


47728it [08:13, 96.62it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:42<00:00, 169.07it/s]


Barley.tif


47728it [08:02, 98.86it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:54<00:00, 161.92it/s]


Dry_Pea.tif


47728it [08:28, 93.93it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:12<00:00, 152.85it/s]


Cabbage.tif


47728it [08:15, 96.32it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:44<00:00, 167.61it/s]


T_Maize.tif


47728it [07:41, 103.38it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:45<00:00, 167.13it/s]


Alfalfa.tif


47728it [07:48, 101.80it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:38<00:00, 171.27it/s]


Coconut.tif


47728it [07:43, 103.04it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:28<00:00, 177.43it/s]


L_Maize.tif


47728it [08:11, 97.15it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:37<00:00, 171.86it/s]


Cocoyam.tif


47728it [08:10, 97.35it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:39<00:00, 170.93it/s]


Cassava.tif


47728it [08:25, 94.36it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:29<00:00, 145.00it/s]


Tobacco.tif


47728it [08:40, 91.73it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:23<00:00, 147.56it/s]


Sorghum.tif


47728it [08:37, 92.14it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:25<00:00, 146.79it/s]


H_Maize.tif


47728it [08:36, 92.48it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:26<00:00, 146.05it/s]


Jatropha.tif


47728it [08:42, 91.42it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:23<00:00, 147.33it/s]


Chickpea.tif


47728it [08:42, 91.29it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:26<00:00, 146.10it/s]


Dry_Rice.tif


47728it [08:43, 91.24it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:25<00:00, 146.60it/s]


Rapeseed.tif


47728it [08:38, 92.10it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:25<00:00, 146.59it/s]


Oil_Palm.tif


47728it [08:27, 94.07it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:55<00:00, 161.63it/s]


Wet_Rice.tif


47728it [08:29, 93.75it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:46<00:00, 166.82it/s]


Sunflower.tif


47728it [08:27, 94.13it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:48<00:00, 165.72it/s]


B_Sorghum.tif


47728it [07:50, 101.43it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:00<00:00, 159.02it/s]


T_Sorghum.tif


47728it [08:19, 95.48it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:51<00:00, 163.51it/s]


Buckwheat.tif


47728it [07:40, 103.62it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:40<00:00, 170.41it/s]


L_Sorghum.tif


47728it [08:18, 95.70it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [04:52<00:00, 163.42it/s]


White_Yam.tif


47728it [07:53, 100.78it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [04:53<00:00, 162.78it/s]


H_Sorghum.tif


47728it [08:21, 95.09it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:14<00:00, 151.94it/s]


Sugarcane.tif


47728it [08:21, 95.25it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:18<00:00, 149.78it/s]


Sugarbeet.tif


47728it [08:33, 93.03it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:27<00:00, 145.66it/s]


Pigeonpea.tif


47728it [08:40, 91.64it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:23<00:00, 147.70it/s]


Spring_Rye.tif


47728it [08:39, 91.89it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:26<00:00, 146.12it/s]


Miscanthus.tif


47728it [08:35, 92.52it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:25<00:00, 146.85it/s]


BL_Sorghum.tif


47728it [08:37, 92.15it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:25<00:00, 146.59it/s]


Winter_Rye.tif


47728it [08:39, 91.80it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:23<00:00, 147.67it/s]


Yellow_Yam.tif


47728it [08:37, 92.27it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:25<00:00, 146.84it/s]


BH_Sorghum.tif


47728it [08:36, 92.37it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:21<00:00, 148.27it/s]


Ground_Nut.tif


47728it [08:29, 93.76it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:20<00:00, 148.77it/s]


BT_Sorghum.tif


47728it [08:28, 93.90it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:20<00:00, 149.02it/s]


Greater_Yam.tif


47728it [08:27, 94.09it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:19<00:00, 149.32it/s]


Switchgrass.tif


47728it [08:22, 94.98it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:28<00:00, 145.33it/s]


Pearl_Millet.tif


47728it [08:34, 92.79it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:19<00:00, 149.52it/s]


Silage_Maize.tif


47728it [08:27, 94.01it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:19<00:00, 149.40it/s]


Sweet_Potato.tif


47728it [08:27, 94.07it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:18<00:00, 149.67it/s]


White_Potato.tif


47728it [08:25, 94.33it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:17<00:00, 150.18it/s]


Winter_Wheat.tif


47728it [08:26, 94.27it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:17<00:00, 150.53it/s]


Cocoa_Cumoun.tif


47728it [08:20, 95.44it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:15<00:00, 151.17it/s]


Cocoa_Hybrid.tif


47728it [08:19, 95.57it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:17<00:00, 150.49it/s]


Spring_Wheat.tif


47728it [08:24, 94.52it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:16<00:00, 150.75it/s]


Spring_Barley.tif


47728it [08:24, 94.62it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:16<00:00, 150.79it/s]


Winter_Barley.tif


47728it [08:23, 94.74it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:16<00:00, 150.70it/s]


Phaseolus_Bean.tif


47728it [08:22, 95.05it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:15<00:00, 151.14it/s]


Coffee_Robusta.tif


47728it [08:23, 94.77it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:16<00:00, 150.88it/s]


Foxtail_Millet.tif


47728it [08:22, 94.90it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:15<00:00, 151.36it/s]


Coffee_Arabica.tif


47728it [08:23, 94.77it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:16<00:00, 150.90it/s]


Pasture_Legumes.tif


47728it [08:22, 95.03it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:16<00:00, 150.93it/s]


Reed_Canary_Grass.tif


47728it [08:23, 94.85it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:19<00:00, 149.20it/s]


Para_Rubber_highInput.tif


47728it [08:23, 94.79it/s] 
Grouping: 100%|██████████████████████████| 47728/47728 [05:18<00:00, 149.69it/s]


Napier_Grass_highInput.tif


47728it [08:25, 94.47it/s]
Grouping: 100%|██████████████████████████| 47728/47728 [05:19<00:00, 149.36it/s]


In [10]:
for file in read_path.glob('*.tif'):
    file_path = output_path / file.name[:-4]
    shutil.rmtree(file_path)