In [1]:
%matplotlib inline
import os
import sys
project_dir = os.path.join(os.pardir, os.pardir)
sys.path.append(project_dir)

import dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geojson import Feature
import json

from src.data.processing_func import (connect_database, extract_geo_sections)

pd.options.display.max_columns = 50
pd.options.display.max_rows = 500

In [2]:
def read_accidents(fp):
    df = (pd.read_csv(fp, encoding="latin3")
            .drop(["hora", "classe_de", "codlog"], axis=1)
            .rename(columns={"nïż½mero_de": "numero_de"})
            .replace(to_replace="\\b.*culo\\b",value="Obstaculo",regex=True)
            .replace(to_replace="\\b.*nibus\\b",value="Onibus",regex=True)
            .replace(to_replace="\\b[Ss]a.*da",value="Saida",regex=True)
            .replace(to_replace="\\b[Cc]aminh.*o\\b",value="Caminhao",regex=True)
            .replace(to_replace="\\bCarro.*a\\b",value="Carroca",regex=True)
            .pipe(correct_latlon)
            .pipe(extract_parts)
            .pipe(generalize_entity)
            .pipe(add_geometry)
            .pipe(convert_into_gdf)
            .pipe(add_geojson)
            .pipe(name_index)
         )
    return df

def correct_latlon(df):
    df.loc[df.X > 10**7,"X"] /= 1000
    df.loc[df.Y > 10**8,"Y"] /= 1000
    return df

def extract_parts(df):
    df = (pd.concat([pd.DataFrame(data=np.tile(row.values,(len(row['tipo'].split(' x ')),1)),
                                 columns=row.index,
                                 index=(row['tipo'].split(' x ')))              
                    for _, row in df.iterrows()])
           .reset_index()
           .rename(columns={"index": "entidade"})
         )
    return df

def generalize_entity(df):
    df.loc[df['entidade'].str.contains('[Mm]oto'), 'entidade'] = 'Moto'
    df.loc[df['entidade'].str.contains('[Bb]icicleta'), 'entidade'] = 'Bicicleta'
    return df
    
def add_geometry(df):
    df["geometry"] = df.apply(lambda row: Point((row["X"], row["Y"])), axis=1)
    return df

def convert_into_gdf(df):  
    crs = "+proj=utm +zone=22J, +south +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
    gdf = gpd.GeoDataFrame(df, crs=crs, geometry="geometry")
    gdf = gdf.to_crs({'init': 'epsg:4326'})
    return gdf

def add_geojson(df):
    df["Longitude"] = df.apply(lambda row: row.geometry.coords[0][0], axis=1)
    df["Latitude"] = df.apply(lambda row: row.geometry.coords[0][1], axis=1)
    df["geojson"] = df.apply(lambda row: json.dumps(Feature(geometry=row.geometry)), axis=1)
    return df

def name_index(df):
    df.index.name = "id"
    return df

df_accidents = read_accidents(project_dir + "/data/external/bombeiros_acidentes2015.csv")
df_accidents.head()

Unnamed: 0_level_0,entidade,X,Y,id,data,turno,dia_da_sem,numero_de,tipo,logradouro,ponto_de_r,bairro,codlogra,acumulo,nomelog,st_length_,geometry,Longitude,Latitude,geojson
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,Obstaculo Fixo,713505,7090960.0,20150000.0,01/01/2015,MADRUGADA,QUINTA-FEIRA,1,Obstaculo Fixo,Rua Benjamin Constant,1393,Glïż½ria,1294,144,BERTHA WILL,143.662,POINT (-48.86171860043524 -26.28529549889597),-48.861719,-26.285295,"{""type"": ""Feature"", ""geometry"": {""type"": ""Poin..."
1,Carro,714574,7089280.0,20150000.0,01/01/2015,MANHA,QUINTA-FEIRA,1,Carro x Carro,Rua Quinze de Novembro,844,Centro,1241,111,BLUMENAU,110.884,POINT (-48.85074340009024 -26.3002972989172),-48.850743,-26.300297,"{""type"": ""Feature"", ""geometry"": {""type"": ""Poin..."
2,Carro,714574,7089280.0,20150000.0,01/01/2015,MANHA,QUINTA-FEIRA,1,Carro x Carro,Rua Quinze de Novembro,844,Centro,1241,111,BLUMENAU,110.884,POINT (-48.85074340009024 -26.3002972989172),-48.850743,-26.300297,"{""type"": ""Feature"", ""geometry"": {""type"": ""Poin..."
3,Moto,719251,7094480.0,20150000.0,01/01/2015,NOITE,QUINTA-FEIRA,1,Moto x Bicicleta,Rua Martinho Van Biene,1899,Jardim Iririïż½,8393,774,ROGERIO PEREIRA,128.058,POINT (-48.80480730035975 -26.25260409902171),-48.804807,-26.252604,"{""type"": ""Feature"", ""geometry"": {""type"": ""Poin..."
4,Bicicleta,719251,7094480.0,20150000.0,01/01/2015,NOITE,QUINTA-FEIRA,1,Moto x Bicicleta,Rua Martinho Van Biene,1899,Jardim Iririïż½,8393,774,ROGERIO PEREIRA,128.058,POINT (-48.80480730035975 -26.25260409902171),-48.804807,-26.252604,"{""type"": ""Feature"", ""geometry"": {""type"": ""Poin..."


In [3]:
df_accidents.to_csv(project_dir + "/data/processed/processed_accidents.csv")