## Joining of datasets to obtain full weekly data
Joining population and location data to the protest dataset. The output format has all the locations for each week, which is then populated if a protest took place

In [2]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [60]:
coords = pd.read_csv('data/USDATA.csv')
urban_pop = pd.read_csv('data/US_Census_Urban_Area_ACSDP5Y2020.DP05-2022-06-14T153824.csv')
protests = pd.read_csv('data/acled_locationMatched.csv')

In [61]:
coords = coords.drop(['Unnamed: 0'], axis=1)

Urban area data contains three rows per location, here we are keeping only the one which contains the data we need

In [62]:
urban_pop['Label'] = urban_pop['Label'].shift(2)

to_keep = [(i*3)+2 for i in range(0,3592)]
indices = list(range(0, 10776))
to_drop = list(set(indices) - set(to_keep))

urban_pop = urban_pop.drop(to_drop)

In [63]:
urban_pop['Label'] = urban_pop['Label'].str.split('(').str[0]
urban_pop['Location'] = urban_pop['Label']
urban_pop['Location'] = urban_pop['Location'].str.replace('-', '')
urban_pop['Location'] = urban_pop['Location'].str.replace(',', '')
urban_pop['Location'] = urban_pop['Location'].str.replace(' ', '')

In [64]:
coords['Location'] = coords['NAMELSAD10']
coords['Location'] = coords['Location'].str.replace('--', '')
coords['Location'] = coords['Location'].str.replace(',', '')
coords['Location'] = coords['Location'].str.replace(' ', '')

In [65]:
merged_data = pd.merge(coords,urban_pop,on='Location', how='left')

In [66]:
protests['Date'] = pd.to_datetime(protests['EVENT_DATE'])

In [67]:
protests['Year-Week'] = protests['Date'].dt.strftime('%Y-%U')

In [68]:
years = ['2020', '2021', '2022']

In [69]:
weekly_df = pd.DataFrame(columns=merged_data.columns.to_list())

In [70]:
for year in years:
    for i in range(53):
        df = merged_data.copy()
        df['Year-Week'] = year + '-' + str(i)
        weekly_df = weekly_df.append(df)

In [71]:
protests = protests.drop(['Unnamed: 0'], axis=1)
protests = protests.drop(['X'], axis=1)
protests['Protest?'] = 1

## All protests

In [72]:
weekly_data = pd.merge(weekly_df,protests,on=['UACE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'INTPTLAT10', 'INTPTLON10', 'Year-Week'], how='left')

In [73]:
weekly_data['Protest?'].fillna(0, inplace=True)

In [74]:
weekly_data = weekly_data.drop_duplicates(subset=['GEOID10', 'Year-Week'], keep='first')

In [75]:
weekly_data.to_csv('data/weekly_protests.csv')

## BLM only

In [76]:
blm = protests[protests['BLM'] == 1]

In [77]:
blm_protests = pd.merge(weekly_df,blm,on=['UACE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'INTPTLAT10', 'INTPTLON10', 'Year-Week'], how='left')
blm_protests['Protest?'].fillna(0, inplace=True)
blm_protests = blm_protests.drop_duplicates(subset=['GEOID10', 'Year-Week'], keep='first')

blm_protests.to_csv('data/blm_weekly_protests.csv')