In [39]:
from aequilibrae import Project
import geopandas as gpd
import pandas as pd
import rasterio
import zipfile
from io import BytesIO

In [31]:
from functions.raster_to_dataframe import raster_to_df
from functions.country_main_area import get_main_area

## Model place (Country) and population raster definition
(the raster must be in the same folder of this notebook)

In [40]:
model_place = 'Andorra'
pop_file = 'and_ppp_2020.tif'

## Loading project and getting country main area to be transformed into dataframe

In [33]:
model_fldr = f'../{model_place}'
# pop_file = f'../population/{model_place}/ppp_2020.tif'

In [34]:
project = Project()
project.open(model_fldr)

In [35]:
main_area = get_main_area(project)
# modeling_region = gpd.GeoDataFrame(pd.DataFrame({'ids': [0], 'geometry': [main_area]}), crs="EPSG:4326") ## Ask about it

## Processing the raster image in Python 
(slow for large datasets)

In [41]:
dataset = rasterio.open(pop_file)
df = raster_to_df(dataset, main_area)

## Comparing the total vectorized population to the World Bank source

In [42]:
url = "https://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv"
r = requests.get(url)
buf1 = BytesIO(r.content)
with zipfile.ZipFile(buf1, "r") as f:
    for name in f.namelist():
        if name == 'API_SP.POP.TOTL_DS2_en_csv_v2_4019998.csv':
            with f.open(name) as zd:
                wb_tot_pop_df = pd.read_csv(zd, skiprows = 4, encoding='utf-8')
            break

In [43]:
wb_source = wb_tot_pop_df.loc[wb_tot_pop_df['Country Name'] == model_place, '2020']
table_df = pd.DataFrame({'': ['Vectorized from raster', 'From WB source'], 'Total population': [int(df.population.sum()), int(wb_source)]})
table_df = table_df.groupby('').sum()
table_df

Unnamed: 0,Total population
,
From WB source,77265.0
Vectorized from raster,114728.0


## Creating a new table inside the model for the vectorized population

In [44]:
%%time
# Creating a new table inside the model and inserting the vectorized population there
conn = project.conn
df.to_sql('raw_population', conn, if_exists='replace', index=False)
conn.execute("select AddGeometryColumn( 'raw_population', 'geometry', 4326, 'POINT', 'XY', 1);")

CPU times: user 151 ms, sys: 4.46 ms, total: 155 ms
Wall time: 245 ms


<sqlite3.Cursor at 0x7f9208895260>

## Creating point geometries from the population latitude and longitude coordinates

In [45]:
%%time
conn.execute("UPDATE raw_population SET Geometry=MakePoint(longitude, latitude, 4326)")
conn.commit()

CPU times: user 626 ms, sys: 33.2 ms, total: 659 ms
Wall time: 783 ms


In [46]:
project.close()