We need to find an updated table! This one is from 2019-10.

In [1]:
import os
import requests


fname = "ibama.pdf"
url = "https://politica.estadao.com.br/blogs/estadao-verifica/wp-content/uploads/sites/690/2019/10/Ibama.pdf"

if not os.path.isfile(fname):
    r = requests.get(url, stream=True)
    with open('ibama.pdf', 'wb') as fd:
        for chunk in r.iter_content(chunk_size=1000):
            fd.write(chunk)

Read the pdf with `tabula-py` and drop from NaN in the observation

In [2]:
from tabula import read_pdf


df = read_pdf(
    fname,
    multiple_tables=False,
    pages=2,
    pandas_options={"parse_dates": ["Data_Avist", "Data_Revis"]}
)

df.dropna(inplace=True)

Fixing these based on the position, there is still some ambiguity in the some of the localities names.

In [3]:
df.loc[df["Município"] == "São Cristóvão", "Estado"] = "SE"
df.loc[df["Município"] == "Sirinhaém", "Estado"] = "PE"

In [4]:
import janitor


df.columns = df.columns.str.lower()

df = df.clean_names() \
  .remove_empty() \
  .rename_column("data_avist", "data avistagem") \
  .rename_column("data_revis", "data re-visita") \
  .rename_column("latitude", "lat") \
  .rename_column("longitude", "lon") \
  .encode_categorical(["status"])

In [5]:
df["municipio"] = df["municipio"] + ", " + df["estado"]
df.drop("estado", axis=1, inplace=True)

In [6]:
df["dias"] = (df["data re-visita"] - df["data avistagem"]).dt.days

df["data avistagem"] = df["data avistagem"].dt.strftime("%Y-%m-%d").str.replace("NaT", "na")
df["data re-visita"] = df["data re-visita"].dt.strftime("%Y-%m-%d").str.replace("NaT", "na")
df["dias"] = df["dias"].astype('Int64').astype(str).str.replace("nan", "na")

df["visita"] = (
    df["data avistagem"]
    + " / "
    + df["data re-visita"]
    + " (" + df["dias"] + " dias)"
)

df.drop(["data avistagem", "data re-visita", "dias"], axis=1, inplace=True)

Convert text position to degrees decimal float type.

In [7]:
def fix_pos(pos):
    if isinstance(pos, float):
        return pos
    deg, rest = pos.split("°")
    mi, rest = rest.strip().split("'")
    sec, hem = rest.strip().split('"')
    hem = hem.strip()

    deg, mi, sec = map(float, (deg, mi, sec))
    if hem in ["S", "W"]:
        sign = -1
    elif hem in ["N", "E"]:
        sign = +1
    else:
        raise ValueError(f"Unrecognized sign {sign}, expected 'S', 'W', 'E', or 'N'")

    return sign * (deg + mi/60 + sec/60/60)

In [8]:
df["lat"] = df["lat"].apply(fix_pos)
df["lon"] = df["lon"].apply(fix_pos)

Map it!

In [9]:
import folium
from folium.plugins import Fullscreen

import pandas as pd


m = folium.Map()
Fullscreen().add_to(m)

for k, row in df.iterrows():
    if "Não Observado" in row["status"]:
        icon = folium.Icon(
            color='lightgray',
            icon='tint',
        )
    elif "Vestígios" in row["status"]:
        icon = folium.Icon(
            color='orange',
            icon='tint',
        )
    elif "Manchas" in row["status"]:
        icon = folium.Icon(
            color='red',
            icon='tint',
        )
    else:
        print(f"Unrecognized status {row['status']}.")

    location = row["lat"], row["lon"]
    table = pd.DataFrame(row).drop(["lon", "lat"]).to_html()
    folium.Marker(
        location=location,
        popup=table,
        icon=icon
    ).add_to(m)

m.fit_bounds(m.get_bounds())

In [10]:
m