# Arrests/charges stemming from the Jan. 6 riot at the U.S. Capitol

### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

In [3]:
%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

### Local Reports

In [6]:
# https://www.uscp.gov/media-center/weekly-arrest-summary

In [7]:
# https://github.com/wpinvestigative/us_capitol_police_reports

In [8]:
# https://mpdc.dc.gov/page/may-2020-january-2021-unrest-related-arrests-and-persons-interest

In [9]:
# https://mpdc.dc.gov/sites/default/files/dc/sites/mpdc/publication/attachments/Unrest-Related%20Arrest%20Data%20as%20of%20January%207%202021.pdf

---

### Justice Department list

In [10]:
justice_url = "https://www.justice.gov/opa/investigations-regarding-violence-capitol"

In [11]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
}
response_justice = requests.get(justice_url, headers=headers)

### Create a dataframe with the html table, and clean up the headers

In [12]:
justice = pd.read_html(response_justice.text, attrs={"class": "tablesorter"})[0]
strings = [",", "/", "(", ")", "_"]

In [13]:
justice.columns = (
    justice.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [14]:
# justice.rename(columns=lambda x: x.replace(" ", "_").lower(), inplace=True)

### Make a copy of the dataframe so we can improve the original

In [15]:
justice_df = pd.DataFrame(justice.rename(columns={"name": "fullname"}).copy())

In [16]:
justice_df["fullname"] = justice_df["fullname"].str.replace(
    "BETANCUR, Bryan (aka Bryan Clooney, aka Maximo Clooney)",
    "BETANCUR, Bryan",
    regex=False,
)

In [17]:
justice_df.tail()

Unnamed: 0,case_number,fullname,charge(s),links_to_press_releases__charging_documents,location_of_arrest,case_status,entry_last_updated
129,1:21-mj-119,"WATKINS, Jessica",Restricted building or grounds; violent entry or disorderly conduct; obstruction of an official proceeding,Watkins - Affidavit Watkins - Complaint,Southern District of Ohio,Arrested 1/18/21 in the Southern District of Ohio,1/21/2021
130,,"WEEKS, Bradley",Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds,Weeks - Complaint Weeks - Statement of Facts,Middle District of Florida,Arrested 1/21/21 in the Middle District of Florida. Initial appearance 1/21/21 at 3:00pm. Released on conditions. Virtual hearing set for 1/26/21 at 1:00pm in federal court in the District of Columbia.,1/22/2021
131,1:21-mj-30,"WILLIAMS, Andrew",One count of unlawful entry of a restricted building and one count of disorderly conduct on Capitol grounds,Seven Charged in Federal Court Following Events At the United Capitol Williams - Complaint Williams - Affidavit for Complaint,Middle District of Florida,Arrested in the Middle District of Florida 1/13/21,1/21/2021
132,,"WILLIAMS, Riley June",Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds,"Williams - Complaint Williams - Statement of Facts Williams, Riley - Amended Complaint Williams, Riley - Amended Statement of Facts",Middle District of Pennsylvania,Arrested 1/18/21 in the Middle District of Pennsylvania Released to home confinement on conditions. Hearing (virtual) set for 1/25/21 in federal court in the District of Columbia.,1/21/2021
133,1:21-mj-26,"WRIGLEY, Andrew",Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds,Wrigley - Complaint Wrigley - Statement of Facts,Middle District of Pennsylvania,Arrested on 1/15/21 in the Middle District of Pennsylvania. Initial appearance via video on 1/25/21 in federal court in the District of Columbia.,1/21/2021


In [22]:
justice_df["fullname"] = justice_df["fullname"].str.strip()

In [23]:
justice_df = justice_df[
    [
        "case_number",
        "fullname",
        "location_of_arrest",
        "charge(s)",
        "links_to_press_releases__charging_documents",
        "case_status",
        "entry_last_updated",
    ]
]

### Get out HTML table so we can parse it

In [24]:
response_justice = requests.get(justice_url)
soup = BeautifulSoup(response_justice.text, "html.parser")
table = soup.find("table")

### Get the defendant document links from the table into a list

In [25]:
links = []
for tr in table.findAll("tr")[1:]:
    trs = tr.findAll("td")

    this_row_links = []

    for link in trs[3].findAll("a"):
        this_row_links.append(link["href"])

    links.append(this_row_links)

In [26]:
justice_df["links"] = links

### Get the link descriptions from the table into a list

In [27]:
# press_releases = []
# for tr in table.findAll("tr")[1:]:
#     trs = tr.findAll("td")

#     this_row_text = []

#     for p in trs[3].findAll("p"):
#         this_row_text.append(p)

#     press_releases.append(this_row_text)

In [28]:
# justice_df["press_links"] = press_releases

### We no longer need this column

In [29]:
# justice_df.drop(["links_to_press_releases__charging_documents"], axis=1, inplace=True)

### How's the dataframe look? 

In [30]:
justice_df.head()

Unnamed: 0,case_number,fullname,location_of_arrest,charge(s),links_to_press_releases__charging_documents,case_status,entry_last_updated,links
0,1:21-mj-100,"ABUAL-RAGHEB, Rasha N.",New Jersey,Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Knowingly Engages in Disorderly or Disruptive Conduct in any Restricted Building or Grounds; Violent Entry and Disorderly Conduct on Capitol Grounds,Abual-Ragheb - Complaint Abual-Ragheb - Affidavit,Arrested 1/19/21 in the District of New Jersey,1/22/2021,"[/opa/page/file/1357081/download, /opa/page/file/1357076/download]"
1,1:21-mj-81,"ADAMS, Daniel Page",Eastern District of Texas,Assaulting a federal officer; obstructing law enforcement engaged in official duties incident to civil disorder; knowingly entering or remaining in any restricted building or grounds without lawful authority; violent entry and disorderly conduct on Capitol grounds,"Adams Connell - Complaint Adams Connell - Statement of Facts Adams, Connell - Amended Statement of Facts Adams, Connell - Amended Complaint",Arrested 1/16/21 in the Eastern District of Texas,1/22/2021,"[/opa/page/file/1355881/download, /opa/page/file/1355876/download, /opa/page/file/1358936/download, /opa/page/file/1358931/download]"
2,1:21-mj-10,"ALBERTS, Christopher Michael",Maryland,"Carrying or having readily accessible, on the grounds of the United States Capitol Building, a firearm and ammunition.",Thirteen Charged in Federal Court Following Riot at the United States Capitol Alberts - Complaint Alberts - Statement of Facts,Arrested on 1/7/21. Initial appearance / detention hearing on 1/7/21. Defendant released. Preliminary Hearing set for 1/28/21 at 2:00 p.m.,1/21/2021,"[/opa/pr/thirteen-charged-federal-court-following-riot-united-states-capitol, /opa/press-release/file/1351681/download, /opa/press-release/file/1351686/download]"
3,1:21-mj-3008,"AYRES, Stephen Michael",Northern District of Ohio,"Willfully and knowingly utter loud, threatening, or abusive language, or engage in disorderly or disruptive conduct, at any place in the Grounds or in any of the Capitol Buildings with the intent to impede, disrupt, or disturb the orderly conduct of a session of Congress or either House of Congress, or the orderly conduct in that building of a hearing before, or any deliberations of, a committee of Congress or either House of Congress",Ayres - Affidavit,Arrested 1/25/21 in the Northern District of Ohio,1/26/2021,[/opa/page/file/1360721/download]
4,1:21-mj-24,"BARANYI, Thomas",New Jersey,Restricted building or grounds; violent entry or disorderly conduct,Baranyi - Affidavit Baranyi - Complaint,Arrested on 1/12/21. Initial appearance in the District of New Jersey on 1/12/21,1/21/2021,"[/opa/page/file/1355731/download, /opa/page/file/1356466/download]"


In [31]:
justice_df.location_of_arrest.value_counts().head()

New Jersey                        6
Middle District of Florida        6
Eastern District of Texas         5
Central District of California    5
Northern District of Texas        4
Name: location_of_arrest, dtype: int64

In [32]:
justice_df.to_json("processed/justice_df.json", orient="records", indent=2)

In [33]:
len(justice_df)

134

---

### Ingrest data from the Program on Extremism at GW

In [34]:
# https://extremism.gwu.edu/Capitol-Hill-Cases

In [35]:
!wget 'https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/CapitolHillTracker11521.xlsx' --directory-prefix="raw/" --output-document="raw/latest_extremism.xlsx"

--2021-01-27 06:52:40--  https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/CapitolHillTracker11521.xlsx
Resolving extremism.gwu.edu (extremism.gwu.edu)... 2606:4700::6812:925, 2606:4700::6812:825, 104.18.9.37, ...
Connecting to extremism.gwu.edu (extremism.gwu.edu)|2606:4700::6812:925|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11830 (12K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘raw/latest_extremism.xlsx’


2021-01-27 06:52:41 (16.6 MB/s) - ‘raw/latest_extremism.xlsx’ saved [11830/11830]



In [36]:
gw_df = pd.read_excel("raw/latest_extremism.xlsx", skiprows=1).dropna(how="all", axis=1)

In [37]:
gw_df.columns = (
    gw_df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [38]:
gw_df["datecharged"] = (pd.to_datetime(gw_df["datecharged"]).dt.date).fillna("")

In [39]:
gw_df.head(10)

Unnamed: 0,name,agearrest,gender,state,datecharged
0,"ALBERTS, Christopher Michael",33,M,MD,2021-01-07
1,"BARANYI, Thomas",28,M,NJ,2021-01-10
2,"BARNETT, Richard",60,M,AR,2021-01-07
3,"BAUER, Robert",Unknown,M,KY,
4,"BLACK, Joshua Matthew",44,M,AL,2021-01-13
5,"BLEDSOE, Matthew",Unknown,M,Unknown,
6,"BROCK, Larry Rendell",54,M,TX,2021-01-09
7,"BROWN, Terry",69,M,PA,2021-01-07
8,"CAMARGO, Samuel",Unknown,M,FL,2021-01-15
9,"CHANSLEY, Jacob Anthony",33,M,AZ,2021-01-11


In [40]:
gw_df.name = gw_df.name.str.strip()

In [41]:
len(gw_df)

58

In [42]:
gw_df.state.value_counts()

VA         7
Unknown    6
NY         5
TX         5
FL         5
AR         2
IL         2
PA         2
CO         2
AL         2
MD         2
DE         2
WV         1
NH         1
IA         1
LA         1
WA         1
WI         1
NJ         1
KY         1
MO         1
AZ         1
UT         1
TN         1
ID         1
OH         1
HI         1
DC         1
Name: state, dtype: int64

### Merge with Justice data

In [43]:
# justice_gw_merge = pd.merge(
#     justice_df, gw_df, right_on="name", left_on="fullname", how="right"
# )

In [44]:
# len(justice_gw_merge)

In [45]:
# justice_gw_merge.head()

---

### Read data from the [Prosecution Project](https://theprosecutionproject.org/2021/01/14/prosecution-data-capitol-siege-of-january-6-2021/)

In [107]:
url2 = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ-NJiMr9_MVxsqTSB1sYkzOZSfg59m6ViR7qvjXef3O4txMuWYxh7TlTVcQAxzduCjhLxKP3dlXUhX/pub?output=csv"

In [108]:
cases = pd.read_csv(url2, dtype={"age": int})

In [109]:
cases.columns = (
    cases.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace("-", "_", regex=False)
)

In [110]:
cases = cases.dropna(how="all")

In [111]:
cases["charges"] = cases["charges"].str.upper()

In [112]:
cases[["full_legal_name", "charges"]].head()

Unnamed: 0,full_legal_name,charges
0,Henry Tarrio,7 DC CODE 2506.01(B) POSSESSION OF LARGE CAPACITY AMMUNITION FEEDING DEVICE [2 COUNTS]
1,Matthew Council,18:1752(A)(1) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS
2,Bradley Rukstales,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"
3,Michael Curzio,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"
4,Cindy Fitchett,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"


### Mean age of all arrestees related to the Capitol seige

In [113]:
cases.dtypes

date                   object
date_descriptor        object
case_id                object
group_identifier       object
full_legal_name        object
first_name             object
family_name            object
other_names_aliases    object
co_offenders           object
name_of_case           object
jurisdiction           object
location_country       object
location_state         object
location_city          object
charges                object
additional_details     object
age                    object
gender                 object
racial_ethnic_group    object
short_narrative        object
dtype: object

In [114]:
cases["age"] = cases["age"].fillna("0").str.replace("Bledsoe", "0", regex=False)

In [115]:
cases["age"] = cases["age"].astype(int)

In [116]:
cases["age"].mean()

31.223175965665234

### Number of cases so far

In [117]:
len(cases)

233

### Gender counts for those charged

In [118]:
round(cases.gender.value_counts("normalize") * 100)

Male               86.0
Female             14.0
Unknown/unclear     0.0
Name: gender, dtype: float64

### Race/ethnicity of those charged?

In [119]:
round(cases.racial_ethnic_group.value_counts("normalize") * 100)

White/Caucasian                   91.0
Latino/Hispanic                    4.0
Black/African/African American     3.0
Asian/South Asian                  1.0
Unknown                            1.0
Name: racial_ethnic_group, dtype: float64

In [120]:
cases_race = (
    cases.groupby(["racial_ethnic_group"]).agg({"case_id": "size"}).reset_index()
).rename(columns={"case_id": "count"})

In [121]:
alt.Chart(cases_race).mark_bar().encode(
    x=alt.X("racial_ethnic_group:N", title="Race/ethnicity"),
    y=alt.Y("count:Q", title="Count"),
).properties(width=800, height=400, title="Cases by race/ethnicity")

---

### Cases that only involve illegally entering the Capitol

In [122]:
capitol_entry_cases = cases[
    cases["charges"].fillna("").str.contains("RESTRICTED BUILDING OR GROUNDS")
]

In [123]:
len(capitol_entry_cases)

108

In [124]:
round(capitol_entry_cases.racial_ethnic_group.value_counts("normalize") * 100)

White/Caucasian                   93.0
Latino/Hispanic                    5.0
Asian/South Asian                  1.0
Black/African/African American     1.0
Name: racial_ethnic_group, dtype: float64

In [125]:
round(capitol_entry_cases.location_state.value_counts("normalize") * 100)

District of Columbia    100.0
Name: location_state, dtype: float64

In [126]:
round(capitol_entry_cases.gender.value_counts("normalize") * 100)

Male      82.0
Female    18.0
Name: gender, dtype: float64

---

### Export

In [127]:
cases.to_csv("raw/cases_prosecution_project.csv", index=False)