Análisis exploratorio descriptivo sobre la violencia en Honduras

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Se carga la base de datos

datos = pd.read_excel('datasets/data_cts_intentional_homicide.xlsx', skiprows=2)
print(datos.head(2))

  Iso3_code      Country  Region       Subregion  \
0       ARM      Armenia    Asia    Western Asia   
1       CHE  Switzerland  Europe  Western Europe   

                                           Indicator       Dimension  \
0  Persons arrested/suspected for intentional hom...  by citizenship   
1  Persons arrested/suspected for intentional hom...  by citizenship   

            Category   Sex    Age  Year Unit of measurement  VALUE Source  
0  National citizens  Male  Total  2013              Counts   35.0    CTS  
1  National citizens  Male  Total  2013              Counts   28.0    CTS  


In [None]:
# Se filtran unicamente los datos de Honduras y se elimina la base de UNOCD del entorno para optimizar la memoria RAM

honduras = datos[datos['Country'] == 'Honduras']
print(honduras.head(2))

del datos

   Iso3_code   Country    Region                        Subregion  \
7        HND  Honduras  Americas  Latin America and the Caribbean   
27       HND  Honduras  Americas  Latin America and the Caribbean   

                                            Indicator       Dimension  \
7   Persons arrested/suspected for intentional hom...  by citizenship   
27  Persons arrested/suspected for intentional hom...  by citizenship   

             Category   Sex    Age  Year Unit of measurement   VALUE Source  
7   National citizens  Male  Total  2013              Counts   905.0    CTS  
27  National citizens  Male  Total  2014              Counts  1319.0    CTS  


In [5]:
# Se establece un ciclo for para recorrer las columnas de la base de datos y mostrar los valores únicos de cada una

for col in honduras.columns:
    print(f'columna: {col}')
    print(honduras[col].unique())

columna: Iso3_code
['HND']
columna: Country
['Honduras']
columna: Region
['Americas']
columna: Subregion
['Latin America and the Caribbean']
columna: Indicator
['Persons arrested/suspected for intentional homicide'
 'Victims of intentional homicide'
 'Victims of intentional homicide – City-level data'
 'Persons convicted for intentional homicide'
 'Death due to intentional homicide in prison']
columna: Dimension
['by citizenship' 'Total' 'by relationship to perpetrator'
 'by situational context' 'by mechanisms' 'by location']
columna: Category
['National citizens' 'Foreign citizens' 'Total'
 'Intimate partner or family member'
 'Intimate partner or family member: Intimate partner'
 'Intimate partner or family member: Family member'
 'Other Perpetrator known to the victim'
 'Perpetrator unknown to the victim'
 'Perpetrator to victim relationship unknown'
 'Organized criminal groups or gangs' 'Other criminal activities'
 'Interpersonal homicide' 'Socio-political homicide'
 'Socio-politic

In [6]:
# Se filtran los datos sobre cada tipo de indicador usando un ciclo for y asignando un número a cada uno

dataframes = {}

for idx, i in enumerate(honduras['Indicator'].unique(), start=1):
    dataframes[idx] = honduras[honduras['Indicator'] == i]

In [7]:
# Se nombran los dataframes acorde a los nombres de los indicadores

arrestos = dataframes[1]
homicidios = dataframes[2]
homicidios_ciudad = dataframes[3]
presos = dataframes[4]
muertes_prision = dataframes[5]

Análisis de los *arrestos*

In [None]:
# Se analizan las frecuencias de la base de datos sobre arrestos

for col in arrestos.columns:
    print(f'columna: {col}')
    print(arrestos[col].unique())

columna: Iso3_code
['HND']
columna: Country
['Honduras']
columna: Region
['Americas']
columna: Subregion
['Latin America and the Caribbean']
columna: Indicator
['Persons arrested/suspected for intentional homicide']
columna: Dimension
['by citizenship' 'Total']
columna: Category
['National citizens' 'Foreign citizens' 'Total']
columna: Sex
['Male' 'Female']
columna: Age
['Total' 'Unknown' '0-9' '10 -14' '15 -17' '18-19' '20-24' '25-29' '30-44'
 '45-59' '60 and older']
columna: Year
[2013 2014 2015 2016 2019 2020 2021 2022 2018]
columna: Unit of measurement
['Counts' 'Rate per 100,000 population']
columna: VALUE
[9.05000000e+02 1.31900000e+03 1.87900000e+03 1.68200000e+03
 1.56100000e+03 1.16900000e+03 1.24800000e+03 1.23300000e+03
 1.80000000e+01 3.70000000e+01 5.80000000e+01 5.00000000e+01
 3.30000000e+01 4.10000000e+01 3.50000000e+01 4.50000000e+01
 5.00000000e+00 1.30000000e+01 6.00000000e+00 8.00000000e+00
 1.00000000e+01 1.60000000e+01 0.00000000e+00 3.10000000e+01
 4.20000000e+01