# Data preparation

## Making a panel df of ACLED events

In [1]:
import pandas as pd

events = pd.read_csv('data/acled_events.csv')
events

Unnamed: 0,Country,Admin1,Event month,Event Type,Events
0,United States,Alabama,2020-01-01,Protests,5
1,United States,Alabama,2020-01-01,Riots,0
2,United States,Alabama,2020-01-01,Strategic developments,0
3,United States,Alabama,2020-01-01,Violence against civilians,0
4,United States,Alabama,2020-01-01,Battles,0
...,...,...,...,...,...
5503,United States,North Dakota,2021-06-01,Riots,0
5504,United States,North Dakota,2021-06-01,Strategic developments,0
5505,United States,North Dakota,2021-06-01,Violence against civilians,0
5506,United States,North Dakota,2021-06-01,Battles,0


### Preprocessing

1) Drop the Country column

2) Convert "Event month" to "period" in YYYYMM format and set it as the index

3) Replace full state names with 2-letter codes using a mapping dictionary

4) Melt the table with a primary key of (state, period)

In [2]:
states_dict = {
    'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas',
    'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware',
    'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho',
    'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
    'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
    'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota',
    'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska',
    'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey',
    'NM': 'New Mexico', 'NY': 'New York', 'NC': 'North Carolina',
    'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon',
    'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
    'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
    'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington',
    'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming'
}

name_to_code = {v: k for k, v in states_dict.items()}

events.drop(columns=['Country'], inplace=True)

events['period'] = pd.to_datetime(events['Event month']).dt.strftime('%Y%m')
events.drop(columns=['Event month'], inplace=True)

events.rename(columns={'Admin1': 'state'}, inplace=True)
events['state'] = events['state'].map(name_to_code).fillna(events['state'])

# Melt the dataframe to long format
events_melted = pd.melt(
    events,
    id_vars=['state', 'period', 'Events'],
    value_name='event_type'
)

# Drop the variable column and rearrange to have event_type first, then Events
events_melted.set_index(['state', 'period'], inplace=True)
events_melted.drop(columns=['variable'], inplace=True)
events_melted = events_melted[['event_type', 'Events']]
events_melted

Unnamed: 0_level_0,Unnamed: 1_level_0,event_type,Events
state,period,Unnamed: 2_level_1,Unnamed: 3_level_1
AL,202001,Protests,5
AL,202001,Riots,0
AL,202001,Strategic developments,0
AL,202001,Violence against civilians,0
AL,202001,Battles,0
...,...,...,...
ND,202106,Riots,0
ND,202106,Strategic developments,0
ND,202106,Violence against civilians,0
ND,202106,Battles,0


## Pivot the df to match our panel data format

In [None]:
# Create pivoted dataframes for Events and Fatalities
events_pivot = pd.pivot_table(
    events_melted, 
    index=['state', 'period'],
    columns='event_type',
    values='Events',
    fill_value=0
)

# Define mappings for column renaming
events_mapping = {
    'Battles': 'battles',
    'Explosions/Remote violence': 'explosions',
    'Protests': 'protests',
    'Riots': 'riots',
    'Strategic developments': 'strategic_devs',
    'Violence against civilians': 'violence'
}
events_pivot = events_pivot.rename(columns=events_mapping)
acled = events_pivot

acled.to_csv('data/acled_panel.csv', index=True)

acled

Unnamed: 0_level_0,event_type,battles,explosions,protests,riots,strategic_devs,violence
state,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,202001,0.0,0.0,4.0,0.0,0.0,0.0
AK,202002,0.0,0.0,11.0,0.0,0.0,0.0
AK,202003,0.0,0.0,1.0,0.0,1.0,0.0
AK,202004,0.0,0.0,2.0,0.0,0.0,0.0
AK,202005,0.0,0.0,8.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
WY,202102,0.0,0.0,0.0,0.0,0.0,0.0
WY,202103,0.0,0.0,1.0,0.0,0.0,0.0
WY,202104,0.0,0.0,3.0,0.0,0.0,0.0
WY,202105,0.0,0.0,1.0,0.0,0.0,0.0
