In [59]:
%matplotlib inline
import os
import sys
project_dir = os.path.join(os.pardir, os.pardir)
sys.path.append(project_dir)

import dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import geojson

from src.data.processing_func import (connect_database, extract_geo_sections)

pd.options.display.max_columns = 50

In [2]:
DATABASE = {
'drivername': os.environ.get("db_drivername"),
'host': os.environ.get("db_host"), 
'port': os.environ.get("db_port"),
'username': os.environ.get("db_username"),
'password': os.environ.get("db_password"),
'database': os.environ.get("db_database"),
}

meta = connect_database(DATABASE)

In [65]:
def read_accidents(fp):
    df = (pd.read_csv(fp, encoding="latin3")
            .drop(["hora", "classe_de", "codlog"], axis=1)
            .rename(columns={"nïż½mero_de": "numero_de"})
            .replace(to_replace="\\b.*culo\\b",value="Obstaculo",regex=True)
            .replace(to_replace="\\b.*nibus\\b",value="Onibus",regex=True)
            .replace(to_replace="\\b[Ss]a.*da",value="Saida",regex=True)
            .replace(to_replace="\\b[Cc]aminh.*o\\b",value="Caminhao",regex=True)
            .replace(to_replace="\\bCarro.*a\\b",value="Carroca",regex=True)
            .pipe(extract_parts)
            .pipe(generalize_entity)
            .pipe(create_geojson_point)
         )
    return df

def extract_parts(df):
    df = (pd.concat([pd.DataFrame(data=np.tile(row.values,(len(row['tipo'].split(' x ')),1)),
                                 columns=row.index,
                                 index=(row['tipo'].split(' x ')))              
                    for _, row in df.iterrows()])
           .reset_index()
           .rename(columns={"index": "entidade"})
         )
    return df

def generalize_entity(df):
    df.loc[df['entidade'].str.contains('[Mm]oto'), 'entidade'] = 'Moto'
    df.loc[df['entidade'].str.contains('[Bb]icicleta'), 'entidade'] = 'Bicicleta'
    return df
    
def create_geojson_point(df):
    df["geometry"] = df.apply(lambda row: geojson.Point((row["X"], row["Y"])), axis=1)
    return df

def convert_into_gdf(df):  
    crs = "+proj=utm +zone=22J, +south +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
    gdf = gpd.GeoDataFrame(df, crs=crs, geometry="geometry")
    gdf = gdf.to_crs({'init': 'epsg:4326'})
    return gdf

df_accidents = read_accidents(project_dir + "/data/external/bombeiros_acidentes2015.csv")
df_accidents.head()

Unnamed: 0,entidade,X,Y,id,data,turno,dia_da_sem,numero_de,tipo,logradouro,ponto_de_r,bairro,codlogra,acumulo,nomelog,st_length_,geometry
0,Obstaculo Fixo,713505,7090960.0,20150000.0,01/01/2015,MADRUGADA,QUINTA-FEIRA,1,Obstaculo Fixo,Rua Benjamin Constant,1393,Glïż½ria,1294,144,BERTHA WILL,143.662,"{'type': 'Point', 'coordinates': [713505.0141,..."
1,Carro,714574,7089280.0,20150000.0,01/01/2015,MANHA,QUINTA-FEIRA,1,Carro x Carro,Rua Quinze de Novembro,844,Centro,1241,111,BLUMENAU,110.884,"{'type': 'Point', 'coordinates': [714573.5581,..."
2,Carro,714574,7089280.0,20150000.0,01/01/2015,MANHA,QUINTA-FEIRA,1,Carro x Carro,Rua Quinze de Novembro,844,Centro,1241,111,BLUMENAU,110.884,"{'type': 'Point', 'coordinates': [714573.5581,..."
3,Moto,719251,7094480000.0,20150000.0,01/01/2015,NOITE,QUINTA-FEIRA,1,Moto x Bicicleta,Rua Martinho Van Biene,1899,Jardim Iririïż½,8393,774,ROGERIO PEREIRA,128.058,"{'type': 'Point', 'coordinates': [719250.6745,..."
4,Bicicleta,719251,7094480000.0,20150000.0,01/01/2015,NOITE,QUINTA-FEIRA,1,Moto x Bicicleta,Rua Martinho Van Biene,1899,Jardim Iririïż½,8393,774,ROGERIO PEREIRA,128.058,"{'type': 'Point', 'coordinates': [719250.6745,..."


In [66]:
df_accidents.to_csv(project_dir + "/data/processed/processed_accidents.csv")