In [78]:
import os 
import pandas as pd
import re
from datetime import datetime
from scipy.stats import iqr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Eventos Críticos

In [55]:
acled = pd.read_csv("../../../Data/ACLED Colombia (2018-01-01-2023-10-31).csv", sep = ";")

# Arreglamos los formatos de fecha
acled["timestamp"] = acled["timestamp"].apply(lambda x: datetime.utcfromtimestamp(x))
acled["event_date"] = pd.to_datetime(acled["event_date"])

inter_code_dict = {1: "State Forces", 2: "Rebel Groups", 3: "Political Militias", 4: "Identity Militias", 
                   5: "Rioters", 6: "Protesters", 7: "Civilians", 8: "External/Other Force"}

acled['inter1'] = acled['inter1'].map(inter_code_dict)

interaction_code_dict = {10: "SOLE STATE FORCES ACTION", 11: "STATE FORCES VERSUS STATE FORCES", 12: "STATE FORCES VERSUS REBELS",
                         13: "STATE FORCES VERSUS POLITICAL MILITIA", 14: "STATE FORCES VERSUS IDENTITY MILITIA", 
                         15: "STATE FORCES VERSUS RIOTERS", 16: "STATE FORCES VERSUS PROTESTERS", 17: "STATE FORCES VERSUS CIVILIANS",
                         18: "STATE FORCES VERSUS EXTERNAL/OTHER FORCES", 20: "SOLE REBEL ACTION",
                         22: "REBELS VERSUS REBELS", 23: "REBELS VERSUS POLITICAL MILITIA", 24: "REBELS VERSUS IDENTITY MILITIA",
                         25: "REBELS VERSUS RIOTERS", 26: "REBELS VERSUS PROTESTERS", 27: "REBELS VERSUS CIVILIANS", 28: "REBELS VERSUS OTHERS",
                         30: "SOLE POLITICAL MILITIA ACTION", 33: "POLITICAL MILITIA VERSUS POLITICAL MILITIA", 34: "POLITICAL MILITIA VERSUS IDENTITY MILITIA",
                         35: "POLITICAL MILITIA VERSUS RIOTERS", 36: "POLITICAL MILITIA VERSUS PROTESTERS", 37: "POLITICAL MILITIA VERSUS CIVILIANS",
                         38: "POLITICAL MILITIA VERSUS OTHERS", 40: "SOLE IDENTITY MILITIA ACTION", 44: "IDENTITY MILITIA VERSUS IDENTITY MILITIA",
                         45: "IDENTITY MILITIA VERSUS RIOTERS", 46: "IDENTITY MILITIA VERSUS PROTESTERS", 47: "IDENTITY MILITIA VERSUS CIVILIANS",
                         48: "IDENTITY MILITIA VERSUS OTHER", 50: "SOLE RIOTER ACTION", 55: "RIOTERS VERSUS RIOTERS", 56: "RIOTERS VERSUS PROTESTERS",
                         57: "RIOTERS VERSUS CIVILIANS", 58: "RIOTERS VERSUS OTHERS", 60: "SOLE PROTESTER ACTION", 66: "PROTESTERS VERSUS PROTESTERS",
                         67: "PROTESTERS VERSUS CIVILIANS", 68: "PROTESTERS VERSUS OTHER", 70: "SOLE CIVILIAN ACTION", 77: "CIVILIANS VERSUS CIVILIANS",
                         78: "OTHER ACTOR VERSUS CIVILIANS", 80: "SOLE OTHER ACTION", 88: "OTHER VERSUS OTHER"}

acled['interaction'] = acled['interaction'].map(interaction_code_dict)

In [85]:
eventos_diarios = acled[["event_id_cnty", "event_date"]].drop_duplicates().groupby(["event_date"]).size().reset_index(name = "n")

# Filtramos fechas
eventos_diarios = eventos_diarios.loc[eventos_diarios["event_date"].between('2021-04-27', '2021-06-29')]

# Vamos a encontrar número de eventos anómalos
corte1 = eventos_diarios["n"].mean() + eventos_diarios["n"].std()
(eventos_diarios["n"] > corte1).mean() # 12.5%

corte2 = eventos_diarios["n"].mean() + 2*eventos_diarios["n"].std()
(eventos_diarios["n"] > corte2).mean() # 6.25%

corte3 = np.percentile(eventos_diarios["n"], 75) + 1.5 * iqr(eventos_diarios["n"])
(eventos_diarios["n"] > corte3).mean() # 6.25%

0.0625

In [91]:
eventos_diarios.loc[eventos_diarios["n"] > corte1, ]

Unnamed: 0,event_date,n
1208,2021-04-28,108
1213,2021-05-03,102
1214,2021-05-04,111
1215,2021-05-05,105
1217,2021-05-07,75
1218,2021-05-08,70
1222,2021-05-12,80
1229,2021-05-19,85


In [94]:
serie_disorder_type = acled[["event_id_cnty", "event_date", "disorder_type"]].drop_duplicates().reset_index(drop = True)
# Tenemos entradas como "Political violence; Demonstrations". Queremos partir en dos estos eventos
s = serie_disorder_type['disorder_type'].str.split('; ', expand=True).stack()
s.index = s.index.droplevel(-1)  
s.name = 'disorder_type'  
# Eliminamos la columna original para cambiarla por la nueva
del serie_disorder_type['disorder_type']
# Se pega la columna nueva en la base original
serie_disorder_type = serie_disorder_type.join(s)
serie_disorder_type = serie_disorder_type.reset_index(drop=True)
# Contamos ocurrencias
serie_disorder_type = serie_disorder_type.groupby(["event_date", "disorder_type"]).size().reset_index(name = "n")
# Filtramos fechas
serie_disorder_type = serie_disorder_type.loc[serie_disorder_type["event_date"].between('2021-04-27', '2021-06-29')]

In [103]:
serie_disorder_type2 = serie_disorder_type[serie_disorder_type["disorder_type"] == "Political violence"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_disorder_type2["n"].mean() + serie_disorder_type2["n"].std()
print((serie_disorder_type2["n"] > corte1).mean()) # 18.75%

corte2 = serie_disorder_type2["n"].mean() + 2*serie_disorder_type2["n"].std()
print((serie_disorder_type2["n"] > corte2).mean()) # 1.56%

corte3 = np.percentile(serie_disorder_type2["n"], 75) + 1.5 * iqr(serie_disorder_type2["n"])
print((serie_disorder_type2["n"] > corte3).mean()) # 0.0%

serie_disorder_type2.loc[serie_disorder_type2["n"] > corte2, ]

0.1875
0.015625
0.0


Unnamed: 0,event_date,disorder_type,n
2659,2021-05-04,Political violence,15


In [107]:
serie_event_type = acled[["event_id_cnty", "event_date", "event_type"]].drop_duplicates() \
    .groupby(["event_date", "event_type"]).size().reset_index(name = "n")
# Filtramos fechas
serie_event_type = serie_event_type.loc[serie_event_type["event_date"].between('2021-04-27', '2021-06-29')]

serie_sub_event_type = acled[["event_id_cnty", "event_date", "sub_event_type"]].drop_duplicates() \
    .groupby(["event_date", "sub_event_type"]).size().reset_index(name = "n")
# Filtramos fechas
serie_sub_event_type = serie_sub_event_type.loc[serie_sub_event_type["event_date"].between('2021-04-27', '2021-06-29')]

In [114]:
serie_event_type2 = serie_event_type[serie_event_type["event_type"] == "Violence against civilians"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_event_type2["n"].mean() + serie_event_type2["n"].std()
print((serie_event_type2["n"] > corte1).mean()) 

corte2 = serie_event_type2["n"].mean() + 2*serie_event_type2["n"].std()
print((serie_event_type2["n"] > corte2).mean()) 

corte3 = np.percentile(serie_event_type2["n"], 75) + 1.5 * iqr(serie_event_type2["n"])
print((serie_event_type2["n"] > corte3).mean())

serie_event_type2.loc[serie_event_type2["n"] > corte1, ]

0.2222222222222222
0.015873015873015872
0.015873015873015872


Unnamed: 0,event_date,event_type,n
4306,2021-05-04,Violence against civilians,9
4331,2021-05-09,Violence against civilians,9
4336,2021-05-10,Violence against civilians,7
4362,2021-05-15,Violence against civilians,7
4383,2021-05-19,Violence against civilians,7
4405,2021-05-23,Violence against civilians,8
4410,2021-05-24,Violence against civilians,7
4424,2021-05-27,Violence against civilians,7
4456,2021-06-02,Violence against civilians,7
4536,2021-06-20,Violence against civilians,11


In [120]:
serie_sub_event_type.groupby("sub_event_type")["n"].sum().sort_values(ascending = False).head()

sub_event_type
Peaceful protest             1299
Violent demonstration         343
Attack                        274
Armed clash                   123
Protest with intervention      67
Name: n, dtype: int64

In [122]:
serie_sub_event_type2 = serie_sub_event_type[serie_sub_event_type["sub_event_type"] == "Violent demonstration"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_sub_event_type2["n"].mean() + serie_sub_event_type2["n"].std()
print((serie_sub_event_type2["n"] > corte1).mean()) 

corte2 = serie_sub_event_type2["n"].mean() + 2*serie_sub_event_type2["n"].std()
print((serie_sub_event_type2["n"] > corte2).mean()) 

corte3 = np.percentile(serie_sub_event_type2["n"], 75) + 1.5 * iqr(serie_sub_event_type2["n"])
print((serie_sub_event_type2["n"] > corte3).mean())

serie_sub_event_type2.loc[serie_sub_event_type2["n"] > corte2, ]

0.1509433962264151
0.09433962264150944
0.03773584905660377


Unnamed: 0,event_date,sub_event_type,n
5061,2021-05-01,Violent demonstration,18
5074,2021-05-03,Violent demonstration,18
5084,2021-05-04,Violent demonstration,23
5092,2021-05-05,Violent demonstration,18
5437,2021-06-28,Violent demonstration,21


In [123]:
serie_sub_event_type2 = serie_sub_event_type[serie_sub_event_type["sub_event_type"] == "Attack"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_sub_event_type2["n"].mean() + serie_sub_event_type2["n"].std()
print((serie_sub_event_type2["n"] > corte1).mean()) 

corte2 = serie_sub_event_type2["n"].mean() + 2*serie_sub_event_type2["n"].std()
print((serie_sub_event_type2["n"] > corte2).mean()) 

corte3 = np.percentile(serie_sub_event_type2["n"], 75) + 1.5 * iqr(serie_sub_event_type2["n"])
print((serie_sub_event_type2["n"] > corte3).mean())

serie_sub_event_type2.loc[serie_sub_event_type2["n"] > corte2, ]

0.1746031746031746
0.031746031746031744
0.015873015873015872


Unnamed: 0,event_date,sub_event_type,n
5113,2021-05-09,Attack,9
5391,2021-06-20,Attack,11


In [125]:
filtro = acled["interaction"].isin(["STATE FORCES VERSUS RIOTERS", "STATE FORCES VERSUS PROTESTERS", "STATE FORCES VERSUS CIVILIANS"])
serie_policia_civiles = acled.loc[filtro, ["event_id_cnty", "event_date", "interaction"]].drop_duplicates()
serie_policia_civiles = serie_policia_civiles.groupby(["event_date", "interaction"]).size().reset_index(name = "n")
# Filtramos fechas
serie_policia_civiles = serie_policia_civiles.loc[serie_policia_civiles["event_date"].between('2021-04-27', '2021-06-29')]
eventos_diarios["interaction"] = "TOTAL"
serie_policia_civiles = pd.concat([serie_policia_civiles, eventos_diarios], axis = 0)

In [127]:
serie_policia_civiles2 = serie_policia_civiles[serie_policia_civiles["interaction"] == "STATE FORCES VERSUS RIOTERS"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_policia_civiles2["n"].mean() + serie_policia_civiles2["n"].std()
print((serie_policia_civiles2["n"] > corte1).mean()) 

corte2 = serie_policia_civiles2["n"].mean() + 2*serie_policia_civiles2["n"].std()
print((serie_policia_civiles2["n"] > corte2).mean()) 

corte3 = np.percentile(serie_policia_civiles2["n"], 75) + 1.5 * iqr(serie_policia_civiles2["n"])
print((serie_policia_civiles2["n"] > corte3).mean())

serie_policia_civiles2.loc[serie_policia_civiles2["n"] > corte2, ]

0.16326530612244897
0.061224489795918366
0.04081632653061224


Unnamed: 0,event_date,interaction,n
418,2021-04-28,STATE FORCES VERSUS RIOTERS,14
432,2021-05-04,STATE FORCES VERSUS RIOTERS,16
512,2021-06-28,STATE FORCES VERSUS RIOTERS,18


In [128]:
serie_policia_civiles2 = serie_policia_civiles[serie_policia_civiles["interaction"] == "STATE FORCES VERSUS PROTESTERS"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_policia_civiles2["n"].mean() + serie_policia_civiles2["n"].std()
print((serie_policia_civiles2["n"] > corte1).mean()) 

corte2 = serie_policia_civiles2["n"].mean() + 2*serie_policia_civiles2["n"].std()
print((serie_policia_civiles2["n"] > corte2).mean()) 

corte3 = np.percentile(serie_policia_civiles2["n"], 75) + 1.5 * iqr(serie_policia_civiles2["n"])
print((serie_policia_civiles2["n"] > corte3).mean())

serie_policia_civiles2.loc[serie_policia_civiles2["n"] > corte2, ]

0.15151515151515152
0.030303030303030304
0.030303030303030304


Unnamed: 0,event_date,interaction,n
424,2021-05-01,STATE FORCES VERSUS PROTESTERS,9


In [130]:
serie_policia_civiles2 = serie_policia_civiles[serie_policia_civiles["interaction"] == "STATE FORCES VERSUS CIVILIANS"]

# Vamos a encontrar número de eventos anómalos
corte1 = serie_policia_civiles2["n"].mean() + serie_policia_civiles2["n"].std()
print((serie_policia_civiles2["n"] > corte1).mean()) 

corte2 = serie_policia_civiles2["n"].mean() + 2*serie_policia_civiles2["n"].std()
print((serie_policia_civiles2["n"] > corte2).mean()) 

corte3 = np.percentile(serie_policia_civiles2["n"], 75) + 1.5 * iqr(serie_policia_civiles2["n"])
print((serie_policia_civiles2["n"] > corte3).mean())

serie_policia_civiles2.loc[serie_policia_civiles2["n"] > corte2, ]

0.0
0.0
0.0


Unnamed: 0,event_date,interaction,n


In [None]:
# Línea de tiempo Indepaz https://www.indepaz.org.co/wp-content/uploads/2021/09/Linea-de-tiempo-final.pdf
eventos = {
    "2021-04-28": {
        "n_eventos" 108,
        "descripción": "inicio del paro",
        "muertos": 3,
        "categoria": "global"
    },

    "2021-05-03": {
        "n_eventos" 102,
        "descripción": "renuncia Carrasquilla",
        "muertos": 6,
        "categoria": "global"
    },

    "2021-05-04": {
        "n_eventos" 111,
        "descripción": "Siloé incomunicado, sin servicio de energía e internet, segunda noche de ataque policial. El día de más violencia política",
        "muertos": 0,
        "categoria": "global" 
    },

    "2021-05-05": {
        "n_eventos" 105,
        "descripción": "Se declara paro indefinido",
        "muertos": 0,
        "categoria": "global" 
    },

    "2021-05-07" {
        "n_eventos" 75,
        "descripción": "Reportan 126 agresiones en contra de misiones médicas en menos de una semana.",
        "muertos": 0,
        "categoria": "global" 
    },

    "2021-05-08" {
        "n_eventos" 70,
        "descripción": "Minga indígena captura hombre armado que disparaba a ciudadanos en Cali.",
        "muertos": 0,
        "categoria": "global" 
    },

    "2021-05-12" {
        "n_eventos" 80,
        "descripción": "Educación gratuita para Estratos 1, 2 y 3",
        "muertos": 1,
        "categoria": "global" 
    },

    "2021-05-19" {
        "n_eventos" 85,
        "descripción": "Gran marcha del millón",
        "muertos": 1,
        "categoria": "global" 
    },

}

# Atención

In [5]:
os.listdir("../../../Data/Pickle")

['user_to_party_jan_oct.pkl',
 'mapa.pkl',
 'user_to_party.pkl',
 'global_segregation_daily.pkl',
 'rts_usuario.pkl',
 'pos_fruchterman_reingold.pkl',
 'users_to_date.pkl',
 'group_segregation.pkl',
 'individual_segregation_daily.pkl',
 'group_segregation_3day.pkl',
 'rts_usuario_paro.pkl',
 'user_to_party_paro.pkl',
 'rts_usuario_jan_oct.pkl',
 'user_indices.pkl',
 'global_segregation_3day.pkl',
 'group_segregation_daily.pkl',
 'global_segregation.pkl',
 'individual_segregation.pkl',
 'layout_forceatlas2.pkl']

In [7]:
df = pd.read_pickle("../../../Data/Pickle/individual_segregation_daily.pkl")
df = df.reset_index()

In [13]:
df = df.melt(id_vars = ["Node", "Political Label"])

In [21]:
df = df.pivot(index = ["Node", "variable"], columns = "Political Label", values = "value").reset_index()

In [25]:
df.columns.name = None

In [33]:
df["variable"] = df["variable"].apply(lambda x: re.match(".* (\d{4}-\d{2}-\d{2})", x).group(1))

In [35]:
df = df.rename(columns = {"variable": "Fecha"})

In [37]:
df["Fecha"] = pd.to_datetime(df["Fecha"])

In [43]:
df = df.sort_values(by = ["Fecha", "Node"]).reset_index(drop = True)

In [45]:
df.isna().mean()

Node              0.000000
Fecha             0.000000
Centro            0.413759
Derecha           0.413759
Izquierda         0.413759
Sin Clasificar    0.413759
dtype: float64

In [46]:
df.head()

Unnamed: 0,Node,Fecha,Centro,Derecha,Izquierda,Sin Clasificar
0,0,2021-04-28,,,,
1,1,2021-04-28,,,,
2,2,2021-04-28,0.0,0.0,0.0,3.939678
3,3,2021-04-28,0.0,0.0,1.384536,1.313226
4,4,2021-04-28,0.417253,0.0,1.928461,0.140703
