In [1]:
from fastcore.all import *
import pandas as pd, geopandas, matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('data/entidades.csv', encoding='latin1')
df.columns = map(str.lower, df.columns)
df.iloc[:3,-8:]

Unnamed: 0,poder,sector,departamento,provincia,distrito,entidad
0,Poder Legislativo,Otros,Lima,Lima,Lima,Congreso de la RepÃºblica (Congreso)
1,Poder Judicial,Otros,Lima,Lima,Lima,Academia de la Magistratura (AMAG)
2,Poder Judicial,Otros,Arequipa,Arequipa,Arequipa,Corte Superior de Arequipa (CSJAR)


In [3]:
# Drop unkwown locations
cols = ['departamento', 'provincia', 'distrito']
df.dropna(subset=cols, inplace=True)

In [4]:
def format_col(df, col): df[col] = df[col].str.title().str.replace('De', 'de')
for col in cols: format_col(df, col)

In [5]:
df.departamento.unique()

array(['Lima', 'Arequipa', 'Cusco', 'Amazonas', 'Ancash', 'Apurimac',
       'Ayacucho', 'Cajamarca', 'Huancavelica', 'Huanuco', 'Ica', 'Junin',
       'La Libertad', 'Loreto', 'Pasco', 'Callao', 'Puno', 'Piura',
       'Tacna', 'Tumbes', 'Ucayali', 'Lambayeque', 'San Martin',
       'Madre de Dios', 'Moquegua'], dtype=object)

In [6]:
df[cols].nunique()

departamento      25
provincia        201
distrito        1669
dtype: int64

In [7]:
gdf = geopandas.read_file('data/departamentos/DEPARTAMENTOS.shp')
gdf.columns = map(str.lower, gdf.columns)
gdf.rename({'departamen': 'departamento'}, axis=1, inplace=True)
gdf.drop(['capital', 'fuente', 'iddpto'], axis=1, inplace=True)
format_col(gdf, 'departamento')
gdf.head()

Unnamed: 0,departamento,geometry
0,Amazonas,"POLYGON ((-77.81211 -2.98962, -77.81332 -2.990..."
1,Ancash,"POLYGON ((-77.64692 -8.05086, -77.64669 -8.052..."
2,Apurimac,"POLYGON ((-73.74632 -13.17456, -73.74570 -13.1..."
3,Arequipa,"POLYGON ((-71.98109 -14.64062, -71.98093 -14.6..."
4,Ayacucho,"POLYGON ((-74.34843 -12.17503, -74.35000 -12.1..."


Check if all locations are in the shapefile

In [8]:
def normalize_str(x): return x.lower().replace('ð', 'ñ')
def process_col(df, col): return L(df[col].unique().tolist())#.map(normalize_str)
def check_col(col):
    print(f'Checking {col!r}:')
    a = process_col(df, col)
    b = process_col(gdf, col)
    n = len(set(a) - set(b))
    if n == 0: print('All found')
    else     :
        print(f'{n} values without match')
        print(set(a) - set(b))
        print(set(b) - set(a))

In [9]:
check_col('departamento')

Checking 'departamento':
All found


In [24]:
df_poblacion = pd.read_csv('data/poblacion_departamento.csv')
df_poblacion.columns = map(str.lower, df_poblacion.columns)
format_col(df_poblacion, 'departamento')
test_eq(sum(~df_poblacion.departamento.isin(df.departamento)), 0)
df_poblacion.head(3)

Unnamed: 0,departamento,total
0,Amazonas,426806
1,Ancash,1180638
2,Apurimac,430736


# Export data

In [10]:
public_path = Path('../public')

In [24]:
df.to_csv(public_path / 'entidades.csv', index=False)
df_poblacion.to_csv(public_path / 'poblacion_departamentos.csv', index=False)

In [26]:
gdf.to_file('data/departamentos.geojson', driver='GeoJSON')

In [28]:
!ls -lh data

total 23M
drwxr-xr-x 2 renato renato 4.0K Jun 27 13:14 [0m[01;34mdepartamentos[0m
-rw-rw-r-- 1 renato renato  14M Jun 27 13:32 departamentos.geojson
-rw-rw-r-- 1 renato renato 3.0M Jun 27 13:11 [01;31mdepartamentos.rar[0m
-rw-rw-r-- 1 renato renato 281K Jun 27 13:24 entidades.csv
drwxr-xr-x 2 renato renato 4.0K Jun 27 13:14 [01;34mprovincias[0m
-rw-rw-r-- 1 renato renato 6.7M Jun 27 13:11 [01;31mprovincias.rar[0m


Geojson size is big, so lets use https://mapshaper.org/ to reduce it to 5% and export as topojson

In [3]:
!ls -lh {public_path}

total 812K
-rw-rw-r-- 1 renato renato 490K Jun 27 14:00 departamentos.json
-rw-rw-r-- 1 renato renato 281K Jun 27 13:26 entidades.csv
-rw-rw-r-- 1 renato renato 3.8K Jun 27 01:28 favicon.ico
-rw-rw-r-- 1 renato renato 1.7K Jun 27 01:31 index.html
-rw-rw-r-- 1 renato renato 5.3K Jun 27 01:28 [0m[01;35mlogo192.png[0m
-rw-rw-r-- 1 renato renato 9.5K Jun 27 01:28 [01;35mlogo512.png[0m
-rw-rw-r-- 1 renato renato  492 Jun 27 01:28 manifest.json
-rw-rw-r-- 1 renato renato   67 Jun 27 01:28 robots.txt
