In [None]:
import yaml
import geopandas as gpd

from tqdm import tqdm

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from projections import raster

In [2]:
def find_key(country, keys):
    for key in keys:
        if country in key:
            return key

with open('../Data/Countries_edited.yml') as f:
    countries = yaml.safe_load(f)

codes = {'ESP', 'PRT'}
for country in countries:
    if country.get('continent') == 'Africa':
        codes.add(country['iso3'][0])
        
tribes = gpd.read_file('../Shapefiles/Ethnic/borders_tribes.shp')

country_lowest = {}
extra_countries = [['SSD', 'SDN']]
shps = ['../Shapefiles/GADM/gadm36_0.shp', '../Shapefiles/GADM/gadm36_1.shp', '../Shapefiles/GADM/gadm36_2.shp']
for i, shp in enumerate(shps):
    print(f'adm{i}')
    adm = gpd.read_file(shp)
    adm = adm[adm['GID_0'].apply(lambda x: x in codes)]
    
    # Add extra countries
    print('Adding extra countries')
    for country_set in extra_countries:
        ngdf = adm[adm['GID_0'].apply(lambda x: x in country_set)].copy()
        if ngdf.empty:
            continue
        
        code = '-'.join(country_set)
        codes.add(code)
        
        ngdf['GID_0'] = code
        ngdf = ngdf.dissolve(by='GID_0').reset_index()
        adm = adm.append(ngdf, ignore_index=True)
    
    # Update lowest
    for code in codes:
        country = adm[adm['GID_0'] == code]
        if not country.empty:
            country_lowest[code] = country
        
    # Find intersections
    pieces = []
    for code, country in tqdm(country_lowest.items(), desc='Intersecting'):
        # Join to tribes and find intersection
        inter = gpd.sjoin(tribes, country, how='inner', op='intersects')
        inter.set_index('index_right', inplace=True)
        inter['geometry_tribe'] = inter['geometry']
        inter['geometry_adm'] = country['geometry']
        inter['geometry'] = inter['geometry'].intersection(inter['geometry_adm'])

        inter['area_tribe'] = inter['geometry_tribe'].to_crs(epsg=3035).area
        inter['area_adm'] = inter['geometry_adm'].to_crs(epsg=3035).area
        inter['area_intersection'] = inter['geometry'].to_crs(epsg=3035).area

        # Clean up
        inter.reset_index(drop=True, inplace=True)
        inter.drop(columns=['geometry_adm', 'geometry_tribe'], 
                   inplace=True)
        pieces.append(inter)
        
    # Append and save
    gdf = pieces[0].append(pieces[1:])
    del pieces
    gdf.to_file(f'../Shapefiles/ethnic_preprocessed/tribe_adm{i}.shp')
    gdf.drop(columns='geometry').to_csv(f'../Shapefiles/ethnic_preprocessed/tribe_adm{i}.csv', 
                                        index=False)

adm0
Adding extra countries


Intersecting: 100%|██████████| 58/58 [08:56<00:00,  9.25s/it]
  gdf.to_file(f'../Shapefiles/ethnic_preprocessed/tribe_adm{i}.shp')


adm1
Adding extra countries


Intersecting: 100%|██████████| 58/58 [03:47<00:00,  3.92s/it]
  gdf.to_file(f'../Shapefiles/ethnic_preprocessed/tribe_adm{i}.shp')


adm2
Adding extra countries


Intersecting: 100%|██████████| 58/58 [03:47<00:00,  3.92s/it]
  gdf.to_file(f'../Shapefiles/ethnic_preprocessed/tribe_adm{i}.shp')


# Split version

In [3]:
# Also save a shp version
gdf = gpd.read_file('../Shapefiles/ethnic_preprocessed/tribe_adm0.shp')

# Split big shapes
gdf['portion'] = None
new_rows = []
for idx, row in tqdm(gdf.copy().iterrows(), total=gdf.shape[0], desc='Splitting'):
    area = raster.get_bounding_box_area(row['geometry'])
    if area > 10:
        cutted_geometry = raster.quadrat_cut_geometry(row['geometry'], quadrat_width=1)
        for i, geometry in enumerate(cutted_geometry):
            new_row = row.copy()
            new_row['geometry'] = geometry
            new_row['portion'] = i
            new_rows.append(new_row)
        gdf.drop(index=idx, inplace=True)
        
if new_rows:
    gdf = gdf.append(new_rows)
    
print(gdf.shape)
print(gdf['portion'].max())

gdf.to_file('../Shapefiles/ethnic_preprocessed/tribe_adm0_s.shp')