# Arrests/charges stemming from the Jan. 6 riot at the U.S. Capitol

### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
import requests

%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

### Ingrest data from the Program on Extremism at GW

In [4]:
url = "https://extremism.gwu.edu/Capitol-Hill-Cases"

In [5]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
}

response = requests.get(url, headers=headers)

summary_table = pd.read_html(response.text, attrs={"class": "table"})[0]
charges_table = pd.read_html(response.text, attrs={"class": "table"})[1]

In [6]:
summary_table

Unnamed: 0,Date Updated,# Individuals Charged,Max. # Charges for 1 Individual,Avg. # Charges,# Federal Cases,# D.C. Cases
0,"January 14, 2021",72,17,2.2,45,27


In [7]:
charges_table

Unnamed: 0,Charge Type,Count (Federal),Count (D.C.),Total
0,Illegal Entry and Activity on Designated Ground,91,25,116
1,Firearms Charges,2,19,21
2,Civil Disorder,8,0,8
3,Assault,3,1,4
4,Theft,3,0,3
5,Threats,1,0,1


---

In [8]:
from bs4 import *
import requests

site = url
request = requests.get(site).text

soup = BeautifulSoup(request, "html.parser")

In [9]:
# for a in soup.find_all("div", attrs={"class": "panel-body"}):
#     print(a)

---

### Read data from the [Prosecution Project](https://theprosecutionproject.org/2021/01/14/prosecution-data-capitol-siege-of-january-6-2021/)

In [10]:
url2 = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ-NJiMr9_MVxsqTSB1sYkzOZSfg59m6ViR7qvjXef3O4txMuWYxh7TlTVcQAxzduCjhLxKP3dlXUhX/pub?output=csv"

In [66]:
cases = pd.read_csv(url2)

In [67]:
cases.columns = (
    cases.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace("-", "_", regex=False)
)

In [68]:
cases = cases.dropna(how="all")

In [69]:
cases["charges"] = cases["charges"].str.upper()

In [70]:
cases[["full_legal_name", "charges"]].head()

Unnamed: 0,full_legal_name,charges
0,Henry Tarrio,7 DC CODE 2506.01(B) POSSESSION OF LARGE CAPACITY AMMUNITION FEEDING DEVICE [2 COUNTS]
1,Matthew Council,18:1752(A)(1) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS
2,Bradley Rukstales,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"
3,Michael Curzio,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"
4,Cindy Fitchett,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"


### Mean age of all arrestees related to the Capitol seige

In [71]:
round(cases.age.mean())

41

### Number of cases so far

In [72]:
len(cases)

145

### Gender counts for those charged

In [73]:
round(cases.gender.value_counts("normalize") * 100)

Male               86.0
Female             13.0
Unknown/unclear     1.0
Name: gender, dtype: float64

### Race/ethnicity of those charged?

In [74]:
round(cases.racial_ethnic_group.value_counts("normalize") * 100)

White/Caucasian                   92.0
Black/African/African American     4.0
Latino/Hispanic                    1.0
Asian/South Asian                  1.0
Unknown                            1.0
Name: racial_ethnic_group, dtype: float64

In [107]:
cases_race = (
    cases.groupby(["racial_ethnic_group"]).agg({"case_id": "size"}).reset_index()
).rename(columns={"case_id": "count"})

In [109]:
alt.Chart(cases_race).mark_bar().encode(
    x=alt.X("racial_ethnic_group:N", title="Race/ethnicity"),
    y=alt.Y("count:Q", title="Count"),
).properties(width=800, height=400, title="Cases by race/ethnicity")

---

### Cases that only involve illegally entering the Capitol

In [20]:
capitol_cases = cases[
    cases["charges"].fillna("").str.contains("RESTRICTED BUILDING OR GROUNDS")
]

In [21]:
len(capitol_cases)

33

In [22]:
round(capitol_cases.racial_ethnic_group.value_counts("normalize") * 100)

White/Caucasian                   97.0
Black/African/African American     3.0
Name: racial_ethnic_group, dtype: float64

In [23]:
round(capitol_cases.location_state.value_counts("normalize") * 100)

District of Columbia    100.0
Name: location_state, dtype: float64

In [24]:
round(capitol_cases.gender.value_counts("normalize") * 100)

---

### Export

In [27]:
cases.to_csv("raw/cases_prosecution_project.csv", index=False)

In [28]:
charges_table.to_csv("processed/summary_states_gw.csv", index=False)