In [49]:
import pandas as pd
import geopandas as gpd
import numpy as np
from geopy.geocoders import Nominatim

In [93]:
ports_raw = pd.read_csv('data/ports_volume.csv', sep=';', names=['year', 'federal_state', 'municipality', 'port', 'freight_volume_t', 'gross_container_throughput'], skiprows=1, dtype={'freight_volume_t':'string', 'gross_container_throughput':'string'})
ports = ports_raw

#Port part id
ports['port_part_id'] = ports.port + ' in ' + ports.municipality

#Rename not classified ports
ports.loc[ports.port == 'Não Classificado', 'port'] = ports.loc[ports.port == 'Não Classificado', 'municipality']

#Replace isolated installation string
ports['port'] = ports.port.str.replace(' (Instalação Isolada)', '', regex=False)

#Insert country column
ports['country'] = 'BR'

#Filter for active ports using assumption that data must haven been collected since 2020
ports = ports.loc[ports.port.isin(ports.loc[ports.year > 2020, 'port'])]

#Cast freight and container column to float
ports.loc[ports.freight_volume_t == ',', 'freight_volume_t'] = '0'
ports['freight_volume_t'] = ports.freight_volume_t.str.replace('.', '').str.replace(',', '.').astype(float)
ports['gross_container_throughput'] = ports.gross_container_throughput.str.replace('.', '').str.replace(',', '.').astype(int)

#Groupby port and calculate mean freight
ports = ports.groupby('port_part_id').agg({
                                       'country':'first',
                                       'federal_state':'first',
                                       'municipality':'first',
                                       'port':'first',
                                       'year':'mean',
                                       'freight_volume_t':'mean',
                                       'gross_container_throughput':'mean'

})
ports = ports.groupby('port').agg({
                                       'country':'first',
                                       'federal_state':'first',
                                       'municipality':'first',
                                       'freight_volume_t':'sum',
                                       'gross_container_throughput':'sum'

})

#Calculate ports' energy share
ports['fraction'] = (ports.freight_volume_t / ports.freight_volume_t.sum()
                    + ports.gross_container_throughput / ports.gross_container_throughput.sum()) / 2

ports.sort_values(by='fraction', ascending=False)

  ports['freight_volume_t'] = ports.freight_volume_t.str.replace('.', '').str.replace(',', '.').astype(float)
  ports['gross_container_throughput'] = ports.gross_container_throughput.str.replace('.', '').str.replace(',', '.').astype(int)


Unnamed: 0_level_0,country,federal_state,municipality,freight_volume_t,gross_container_throughput,fraction
port,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Santos,BR,SP,Cubatão,9918240.0,188741.0,0.2394254
Itaqui,BR,MA,São Luís,14349837.4,0.0,0.08229163
Paranaguá - Antonina,BR,PR,Antonina,4177252.0,46562.0,0.06898926
São Francisco do Sul,BR,SC,Itapoá,3091317.2,42738.0,0.05906326
Itajaí,BR,SC,Itajaí,1143580.7,54217.0,0.05899595
Manaus,BR,AM,Itacoatiara,2508503.1,38810.0,0.05192191
Vitória,BR,ES,Anchieta,7091374.6,11105.0,0.05140731
Rio de Janeiro - Niterói,BR,RJ,Duque de Caxias,2254032.2,32507.0,0.04436643
Rio Grande,BR,RS,Pelotas,2959034.2,27506.0,0.04357249
Itaguaí,BR,RJ,Itaguaí,6476079.4,4821.0,0.041801


In [94]:

#Geolocate Ports
geolocator = Nominatim(user_agent='port_locations')

for port_id, port in ports.iterrows():
    
    loc_string = port_id + ', ' + port.federal_state
    try:
        loc = geolocator.geocode(query=loc_string, country_codes='br')
        ports.loc[port_id, 'y'] = loc.latitude
        ports.loc[port_id, 'x'] = loc.longitude
    except:
        try:
            loc = geolocator.geocode(query=loc_string, country_codes='br')
            ports.loc[port_id, 'y'] = loc.latitude
            ports.loc[port_id, 'x'] = loc.longitude
        except:
            ports.loc[port_id, 'y'] = 'not found'
            ports.loc[port_id, 'x'] = 'not found'
ports.sort_values(by='fraction', ascending=False)

Unnamed: 0_level_0,country,federal_state,municipality,freight_volume_t,gross_container_throughput,fraction,y,x
port,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Almeirim,BR,PA,Almeirim,2201.1,0.0,1.262259e-05,-1.529038,-52.578811
Ananindeua,BR,PA,Ananindeua,1861.2,0.0,1.067337e-05,-1.374035,-48.401662
Angra dos Reis,BR,RJ,Angra dos Reis,6342797.2,0.0,0.03637387,-23.006397,-44.316326
Aracajú,BR,SE,Barra dos Coqueiros,64362.0,0.0,0.000369095,-10.916206,-37.077466
Aratu - Salvador,BR,BA,Candeias,2974395.3,15676.0,0.03221878,-12.792619,-38.490386
Barra do Riacho,BR,ES,Aracruz,733255.3,0.0,0.004204979,-19.826546,-40.063682
Belém,BR,PA,Belém,33954.7,0.0,0.0001947191,-1.45056,-48.468245
Cabedelo,BR,PB,Cabedelo,94991.8,0.0,0.0005447469,-6.97324,-34.835161
Caracaraí,BR,RR,Caracaraí,2983.9,0.0,1.711169e-05,1.827598,-61.125389
Coari,BR,AM,Coari,84153.1,0.0,0.0004825905,-4.088596,-63.143117


In [95]:
#export port data
ports.to_csv('data/export_ports.csv')