# Importing ACLED data

In [1]:
import pandas as pd
import urllib.parse
import urllib.error
import pickle
import time
import sys
import glob
from IPython.display import clear_output
import geopandas as gpd
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import plotly.express as px
import pycountry
import numpy as np
import kaleido
import plotly.graph_objects as go

countries_codes_path = 'auxilary_data/countries_iso_fips_capitals.csv'
acled_data_path = '../data/ACLED/'

### Reading in the dataframes from ACLED

In [30]:
# read in xlsx into pandas
middle_east_df = pd.read_csv(acled_data_path + 'MiddleEast.csv')
print('Middle East data loaded' + "\r")
asia_pacific_df = pd.read_csv(acled_data_path + 'AsiaPacific.csv')
print('Asia Pacific data loaded' + "\r")
africa_df = pd.read_csv(acled_data_path + 'Africa.csv')
print('Africa data loaded' + "\r")
europe_central_asia_df = pd.read_csv(acled_data_path + 'EuropeCentralAsia.csv')
print('Europe Central Asia data loaded' + "\r")
latin_america_df = pd.read_csv(acled_data_path + 'LatinAmerica.csv')
print('Latin America data loaded' + "\r")
usa_canada_df = pd.read_csv(acled_data_path + 'USACanada.csv')
print('USA Canada data loaded' + "\r")

Middle East data loaded
Asia Pacific data loaded
Africa data loaded
Europe Central Asia data loaded
Latin America data loaded
USA Canada data loaded



Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.



In [31]:
all_country_df = pd.concat([middle_east_df, asia_pacific_df, africa_df, europe_central_asia_df, latin_america_df, usa_canada_df])

In [32]:
all_country_df.columns

Index(['event_id_cnty', 'event_date', 'year', 'time_precision',
       'disorder_type', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'civilian_targeting', 'iso', 'region', 'country',
       'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude',
       'geo_precision', 'source', 'source_scale', 'notes', 'fatalities',
       'tags', 'timestamp'],
      dtype='object')

In [33]:
def plot_fatilities_country(df_raw, country, smooth):
    df = df_raw[df_raw["country"] == country]
    if df.empty:
        print(f"No data for {country}")
        return
    # group by date and count the number of fatalities
    df_sum = df.groupby("event_date")["fatalities"].sum().reset_index()

    if smooth:
        # smooth the fatalities over time
        df_sum["fatalities"] = df_sum["fatalities"].rolling(window=smooth).mean()

    # plot fatalities over time
    fig = px.line(df_sum, x="event_date", y="fatalities", title=f"Fatalities in {country} over time")
    fig.show()

In [49]:
plot_fatilities_country(all_country_df, "Afghanistan", 1)

## Calculating the number of "Hard cases" in the ACLED data

TODO, to check:

[X] Download data for all of the world

[X] Check fatalities per "event_type"

| EVENT_TYPE | Fatalities |
|------------|------------|
| Battles    | 1009381    |
| Explosions/Remote violence | 361225 |
| Violence against civilians | 312965 |
| Riots | 30459 |
| Protests | 5517 |
| Strategic developments | 4430 |

- [ ] Make rolling window based solely on date, not on the index
- [ ] Calculate "Number of fatalities in last month"
- [ ] Classify fatality occurences after month of 0 fatalities as "hard cases"
- [ ] Calculate "hard cases" per country

In [109]:
# for each row, calculate the number of fatalities in the last month, for the country of the event
def calculate_fatalities_last_month(df):
    df = df.groupby(["country", "event_date"])["fatalities"].sum().reset_index()
    df["fatalities_last_month"] = 0
    df["event_date"] = pd.to_datetime(df["event_date"])
    length = len(df)
    for i in range(length):
        #print estimated time left
        print(f"Row {i} of {length}, {round(i/length*100, 2)}%", end="\r")
        event_date = df.iloc[i]["event_date"]
        country = df.iloc[i]["country"]
        fatalities_last_month = df[(df["event_date"] < event_date) & (df["event_date"] >= (event_date - pd.DateOffset(months=1))) & (df["country"] == country)]["fatalities"].sum()
        df.at[i, "fatalities_last_month"] = fatalities_last_month
    return df       

And then calculate the hard cases from this df:

In [111]:
df_monthly = calculate_fatalities_last_month(all_country_df)
df_monthly.head()

In [91]:
date = all_country_df["event_date"][0:1]
pd.to_datetime(date) - pd.DateOffset(months=1)

0   2024-02-15
Name: event_date, dtype: datetime64[ns]