# Departments and Municipalities

## Department Population

### Department Population History

In [1]:
import pathlib
import pandas as pd

data_dir = pathlib.Path.home() / "ds4a" / "project" / "infrastructure" / "csv_data"
dept_pop = pd.read_csv(
    data_dir / "department_population_2016_2022.csv",
    sep=";",
    usecols=['DP', 'DPNOM', 'AÑO', 'Total Hombres', 'Total Mujeres', 'Total']
)

dept_pop.head()

Unnamed: 0,DP,DPNOM,AÑO,Total Hombres,Total Mujeres,Total
0,5,Antioquia,2016,3000534,3210778,6211312
1,8,Atlántico,2016,1182828,1244218,2427046
2,11,"Bogotá, D.C.",2016,3495609,3805309,7300918
3,13,Bolívar,2016,1000545,1012465,2013010
4,15,Boyacá,2016,590818,609160,1199978


In [2]:
len(dept_pop)

231

In [3]:
dept_pop_hist = dept_pop[['AÑO', 'Total Hombres', 'Total Mujeres', 'Total', 'DP']]
dept_pop_hist

Unnamed: 0,AÑO,Total Hombres,Total Mujeres,Total,DP
0,2016,3000534,3210778,6211312,5
1,2016,1182828,1244218,2427046,8
2,2016,3495609,3805309,7300918,11
3,2016,1000545,1012465,2013010,13
4,2016,590818,609160,1199978,15
...,...,...,...,...,...
226,2022,42406,39662,82068,91
227,2022,26940,25121,52061,94
228,2022,47393,42964,90357,95
229,2022,25520,23412,48932,97


In [4]:
(dept_pop_hist["Total"] == (dept_pop_hist["Total Hombres"] + dept_pop_hist["Total Mujeres"])).mean()

1.0

Keep total column is not necessary since for every row it is equal to the sum of total women and total men

### Departments Table

In [5]:
dept_pop["DPNOM"].nunique(), dept_pop["DP"].nunique()

(33, 33)

In [6]:
dept_pop["DP"].unique()

array([ 5,  8, 11, 13, 15, 17, 18, 19, 20, 23, 25, 27, 41, 44, 47, 50, 52,
       54, 63, 66, 68, 70, 73, 76, 81, 85, 86, 88, 91, 94, 95, 97, 99])

In [7]:
departments = dept_pop[["DP", "DPNOM"]].drop_duplicates().set_index("DP")
departments

Unnamed: 0_level_0,DPNOM
DP,Unnamed: 1_level_1
5,Antioquia
8,Atlántico
11,"Bogotá, D.C."
13,Bolívar
15,Boyacá
17,Caldas
18,Caquetá
19,Cauca
20,Cesar
23,Córdoba


In [8]:
departments["DPNOM"].apply(len).max()

26

## Municipality Population

### Municipality Population History

In [11]:
mun_pop = pd.read_csv(
    data_dir / "municipality_population_2016_2022.csv",
    sep=";"
)

mun_pop.head()

Unnamed: 0,DP,DPNOM,COD_MUNICIPIO,MPNOM,AÑO,ÁREA GEOGRÁFICA,Total
0,5,Antioquia,5001,Medellín,2016,Total,2351077
1,5,Antioquia,5002,Abejorral,2016,Total,20534
2,5,Antioquia,5004,Abriaquí,2016,Total,2629
3,5,Antioquia,5021,Alejandría,2016,Total,4620
4,5,Antioquia,5030,Amagá,2016,Total,29394


In [12]:
mun_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7854 entries, 0 to 7853
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   DP               7854 non-null   int64 
 1   DPNOM            7854 non-null   object
 2   COD_MUNICIPIO    7854 non-null   int64 
 3   MPNOM            7854 non-null   object
 4   AÑO              7854 non-null   int64 
 5   ÁREA GEOGRÁFICA  7854 non-null   object
 6   Total            7854 non-null   int64 
dtypes: int64(4), object(3)
memory usage: 429.6+ KB


In [13]:
mun_pop_filt = mun_pop[["AÑO", "Total", "COD_MUNICIPIO", "MPNOM", "DP"]]
mun_pop_filt

Unnamed: 0,AÑO,Total,COD_MUNICIPIO,MPNOM,DP
0,2016,2351077,5001,Medellín,5
1,2016,20534,5002,Abejorral,5
2,2016,2629,5004,Abriaquí,5
3,2016,4620,5021,Alejandría,5
4,2016,29394,5030,Amagá,5
...,...,...,...,...,...
7849,2022,1211,97889,Yavaraté (CD),97
7850,2022,20798,99001,Puerto Carreño,99
7851,2022,9846,99524,La Primavera,99
7852,2022,4297,99624,Santa Rosalía,99


In [14]:
mun_pop_filt["COD_MUNICIPIO"].nunique()

1122

In [15]:
mun_pop_table = mun_pop[["AÑO", "Total", "COD_MUNICIPIO"]]
mun_pop_table.head()

Unnamed: 0,AÑO,Total,COD_MUNICIPIO
0,2016,2351077,5001
1,2016,20534,5002
2,2016,2629,5004
3,2016,4620,5021
4,2016,29394,5030


In [16]:
mun_pop_table.isna().sum()

AÑO              0
Total            0
COD_MUNICIPIO    0
dtype: int64

### Municipalities

In [17]:
municipalities = pd.read_csv(
    data_dir / "municipalities_lat_lon.csv",
    usecols=["CODE_MUNICIPIO", "LATITUD", "LONGITUD", "CODE_DPTO"],
)
municipalities.head()

Unnamed: 0,CODE_DPTO,CODE_MUNICIPIO,LONGITUD,LATITUD
0,5,5001,-75.576002,6.248586
1,5,5002,-75.42874,5.789301
2,5,5004,-76.064295,6.632282
3,5,5021,-75.141335,6.376063
4,5,5030,-75.701948,6.038808


In [18]:
len(municipalities["CODE_MUNICIPIO"].drop_duplicates()), len(municipalities)

(1053, 1053)

In [19]:
municipalities_merged = pd.merge(mun_pop_filt.drop_duplicates("COD_MUNICIPIO"), municipalities, 
                                 how="left", left_on="COD_MUNICIPIO", right_on="CODE_MUNICIPIO")
municipalities_merged = municipalities_merged[
    ["COD_MUNICIPIO", "MPNOM", "LATITUD", "LONGITUD", "DP"]
]

print(f"{len(municipalities_merged)=:,d}")

len(municipalities_merged)=1,122


In [20]:
municipalities_merged.isna().sum()

COD_MUNICIPIO     0
MPNOM             0
LATITUD          69
LONGITUD         69
DP                0
dtype: int64

In [21]:
municipalities_merged["LEN"] = municipalities_merged["MPNOM"].apply(len)
municipalities_merged.sort_values(by="LEN", ascending=False).head()


Unnamed: 0,COD_MUNICIPIO,MPNOM,LATITUD,LONGITUD,DP,LEN
678,47692,San Sebastián de Buenavista,9.240782,-74.351679,47,27
562,25843,Villa de San Diego de Ubaté,5.307541,-73.814346,25,27
534,25645,San Antonio del Tequendama,4.616135,-74.351451,25,26
95,5664,San Pedro de Los Milagros,6.460138,-75.556719,5,25
447,23586,Purísima de La Concepción,9.2393,-75.724886,23,25


In [22]:
del municipalities_merged["LEN"]
municipalities_merged[municipalities_merged["LATITUD"].isna()]

Unnamed: 0,COD_MUNICIPIO,MPNOM,LATITUD,LONGITUD,DP
33,5150,Carolina,,,5
99,5674,San Vicente,,,5
123,5893,Yondó,,,5
170,13468,Mompós,,,13
177,13647,San Estanislao,,,13
...,...,...,...,...,...
1093,91460,Miriti - Paraná (CD),,,91
1104,94885,La Guadalupe (CD),,,94
1106,94887,Pana Pana (CD),,,94
1107,94888,Morichal (CD),,,94


In [23]:
municipalities_merged.to_csv(data_dir / "municipalities.csv", index=False)

# Suicides

## Suicide Count by Year and Municipality

In [24]:
suicides = pd.read_csv(
    data_dir / "suicides_merged_poplation_latlon.csv",
    index_col=0,
    usecols=["YEAR","CODE_MUNICIPIO","SUI_COUNTER"]
)
print(f"{len(suicides)=:,d}")
suicides.head()

len(suicides)=3,826


Unnamed: 0_level_0,CODE_MUNICIPIO,SUI_COUNTER
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1
2016,91001,3
2016,5002,2
2016,5030,4
2016,5031,1
2016,5034,5


In [25]:
suicides.isna().sum()

CODE_MUNICIPIO    0
SUI_COUNTER       0
dtype: int64

## Suicide Attempts

In [26]:
suicide_attempts = pd.read_csv(
    data_dir / "attempts_merged_population_latlon.csv",
    usecols=["YEAR","WEEK","COUNTER_TRY", "CODE_MUNICIPIO"]
)
print(f"{len(suicide_attempts)=:,d}")
suicide_attempts.head()

len(suicide_attempts)=154,391


Unnamed: 0,YEAR,WEEK,CODE_MUNICIPIO,COUNTER_TRY
0,2016,6,5001,4
1,2016,7,5001,5
2,2016,13,5001,33
3,2016,22,5001,25
4,2016,29,5001,27


In [27]:
suicide_attempts.isna().sum()

YEAR              0
WEEK              0
CODE_MUNICIPIO    0
COUNTER_TRY       0
dtype: int64

# Interfamily Violence

In [28]:
interfamily_violence = pd.read_csv(
    data_dir / "interfamily_violence_merged_population.csv",
    usecols=["YEAR", "CANTIDAD", "CODE_MUNICIPIO"]
)

print(f"{len(interfamily_violence)=:,d}")
interfamily_violence.head()

len(interfamily_violence)=6,399


Unnamed: 0,YEAR,CODE_MUNICIPIO,CANTIDAD
0,2019,11001,36179
1,2020,11001,35924
2,2018,11001,33853
3,2017,11001,32514
4,2021,11001,31976


In [29]:
interfamily_violence.isna().sum()

YEAR              0
CODE_MUNICIPIO    0
CANTIDAD          0
dtype: int64