# Arrests/charges stemming from the Jan. 6 riot at the U.S. Capitol

### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

In [3]:
%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

### Local Reports

In [6]:
# https://www.uscp.gov/media-center/weekly-arrest-summary

In [7]:
# https://github.com/wpinvestigative/us_capitol_police_reports

In [8]:
# https://mpdc.dc.gov/page/may-2020-january-2021-unrest-related-arrests-and-persons-interest

In [9]:
# https://mpdc.dc.gov/sites/default/files/dc/sites/mpdc/publication/attachments/Unrest-Related%20Arrest%20Data%20as%20of%20January%207%202021.pdf

---

### Justice Department list

In [10]:
justice_url = "https://www.justice.gov/opa/investigations-regarding-violence-capitol"

In [11]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
}
response_justice = requests.get(justice_url, headers=headers)

### Create a dataframe with the html table, and clean up the headers

In [12]:
justice = pd.read_html(response_justice.text, attrs={"class": "tablesorter"})[0]

In [13]:
justice.columns = (
    justice.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

### Make a copy of the dataframe so we can improve the original

In [14]:
justice_df = pd.DataFrame(justice.rename(columns={"name": "fullname"}).copy())

In [15]:
justice_df["fullname"] = justice_df["fullname"].str.replace(
    "BETANCUR, Bryan (aka Bryan Clooney, aka Maximo Clooney)",
    "BETANCUR, Bryan",
    regex=False,
)

In [16]:
justice_df.tail()

Unnamed: 0,case_number,fullname,charge(s),links_to_press_releases__charging_documents,residency,case_status,entry_last_updated
70,1:21-mj-50,"SULLIVAN, John Earl","Restricted building or grounds; civil disorders, violent entry or disorderly conduct",Utah Man Charged in Federal Court Following Events at the United States Capitol Sullivan - Criminal Complaint Sullivan - Complaint Affidavit,UT,Initial appearance in the District of Utah at 4pm on 1/15/2021.,1/14/2021
71,1:21-mj-12,"SWEET, Douglas","Knowingly entering or remaining in any restricted building or grounds without lawful authority; or knowingly, with intent to impede government business or official functions, engaging in disorderly conduct on Capitol grounds; and violent entry and disorderly conduct on Capitol grounds.",Thirteen Charged in Federal Court Following Riot at the United States Capitol Fitchett et al - Complaint Fitchett et al - Statement of Facts,VA,,1/15/2021
72,,"WATKINS, Jessica",Restricted building or grounds; violent entry or disorderly conduct; obstruction of an official proceeding,Watkins - Affidavit Watkins - Complaint,,Arrested 1/18/21 in the Southern District of Ohio,1/18/2021
73,1:21-mj-30,"WILLIAMS, Andrew",One count of unlawful entry of a restricted building and one count of disorderly conduct on Capitol grounds,Seven Charged in Federal Court Following Events At the United Capitol Williams - Complaint Williams - Affidavit for Complaint,FL,Arrested in Florida 1/13/21,1/14/2021
74,1:21-mj-26,"WRIGLEY, Andrew",Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds,Wrigley - Complaint Wrigley - Statement of Facts,,Arrested on 1/15/21 in the Middle District of Pennsylvania,1/17/2021


### Clean up defendant names

In [17]:
# justice_df[["namerest"]] = justice_df.pop("fullname").str.split(
#     ",", expand=True
# )

In [18]:
# justice_df[["firstname", "middlename"]] = justice_df.pop("namerest").str.split(
#     expand=True
# )

In [19]:
# justice_df["lastname"] = justice_df["lastname"].str.title()

In [20]:
justice_df.head()

Unnamed: 0,case_number,fullname,charge(s),links_to_press_releases__charging_documents,residency,case_status,entry_last_updated
0,,"ADAMS, Daniel Page",Assaulting a federal officer; obstructing law enforcement engaged in official duties incident to civil disorder; knowingly entering or remaining in any restricted building or grounds without lawful authority; violent entry and disorderly conduct on Capitol grounds,Adams Connell - Complaint Adams Connell - Statement of Facts,,Arrested 1/16/21 in the Eastern District of Texas,1/17/2021
1,1:21-mj-10,"ALBERTS, Christopher Michael","Carrying or having readily accessible, on the grounds of the United States Capitol Building, a firearm and ammunition.",Thirteen Charged in Federal Court Following Riot at the United States Capitol Alberts - Complaint Alberts - Statement of Facts,MD,Arrested on 1/7/21. Initial appearance / detention hearing on 1/7/21. Defendant released. Preliminary Hearing set for 1/28/21 at 2:00 p.m.,
2,,"BARANYI, Thomas",Restricted building or grounds; violent entry or disorderly conduct,Baranyi - Affidavit,,Arrested on 1/12/21. Initial appearance in the District of New Jersey on 1/12/21,1/15/2021
3,1:21-mj-13,"BARNETT, Richard","Knowingly entering or remaining in any restricted building or grounds without lawful entry; violent entry and disorderly conduct on Capitol grounds; and theft of public money, property, or records. Barnett allegedly entered a restricted are of the Speaker of the House Nancy Pelosi.",Man Arrested for Illegally Entering Office of Speaker of the House Thirteen Charged in Federal Court Following Riot at the United States Capitol Barnett - Statement of Facts Barnett - Amended Complaint Barnett - Amended Statement of Facts,AR,Initial Appearance 1/12/21 at 3pm CST (virtual – WDAR District Court),
4,1:21-mj-59,"BAUER, Robert L.",Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds,Bauer and Hemenway - Complaint Bauer and Hemenway - Statement of Facts,,Arrested on 1/15/21 in the Western District of Kentucky.,1/15/2021


In [21]:
justice_df["fullname"] = justice_df["fullname"].str.strip()

In [22]:
justice_df = justice_df[
    [
        "case_number",
        "fullname",
        #         "firstname",
        #         "middlename",
        #         "lastname",
        "residency",
        "charge(s)",
        "links_to_press_releases__charging_documents",
        "case_status",
        "entry_last_updated",
    ]
]

### Get out HTML table so we can parse it

In [23]:
response_justice = requests.get(justice_url)
soup = BeautifulSoup(response_justice.text, "html.parser")
table = soup.find("table")

### Get the defendant document links from the table into a list

In [24]:
links = []
for tr in table.findAll("tr")[1:]:
    trs = tr.findAll("td")

    this_row_links = []

    for link in trs[3].findAll("a"):
        this_row_links.append(link["href"])

    links.append(this_row_links)

In [25]:
justice_df["links"] = links

### Get the link descriptions from the table into a list

In [26]:
# press_releases = []
# for tr in table.findAll("tr")[1:]:
#     trs = tr.findAll("td")

#     this_row_text = []

#     for p in trs[3].findAll("p"):
#         this_row_text.append(p)

#     press_releases.append(this_row_text)

In [27]:
# justice_df["press_links"] = press_releases

### We no longer need this column

In [28]:
# justice_df.drop(["links_to_press_releases__charging_documents"], axis=1, inplace=True)

### How's the dataframe look? 

In [29]:
justice_df.head()

Unnamed: 0,case_number,fullname,residency,charge(s),links_to_press_releases__charging_documents,case_status,entry_last_updated,links
0,,"ADAMS, Daniel Page",,Assaulting a federal officer; obstructing law enforcement engaged in official duties incident to civil disorder; knowingly entering or remaining in any restricted building or grounds without lawful authority; violent entry and disorderly conduct on Capitol grounds,Adams Connell - Complaint Adams Connell - Statement of Facts,Arrested 1/16/21 in the Eastern District of Texas,1/17/2021,"[/opa/page/file/1355881/download, /opa/page/file/1355876/download]"
1,1:21-mj-10,"ALBERTS, Christopher Michael",MD,"Carrying or having readily accessible, on the grounds of the United States Capitol Building, a firearm and ammunition.",Thirteen Charged in Federal Court Following Riot at the United States Capitol Alberts - Complaint Alberts - Statement of Facts,Arrested on 1/7/21. Initial appearance / detention hearing on 1/7/21. Defendant released. Preliminary Hearing set for 1/28/21 at 2:00 p.m.,,"[/opa/pr/thirteen-charged-federal-court-following-riot-united-states-capitol, /opa/press-release/file/1351681/download, /opa/press-release/file/1351686/download]"
2,,"BARANYI, Thomas",,Restricted building or grounds; violent entry or disorderly conduct,Baranyi - Affidavit,Arrested on 1/12/21. Initial appearance in the District of New Jersey on 1/12/21,1/15/2021,[/opa/page/file/1355731/download]
3,1:21-mj-13,"BARNETT, Richard",AR,"Knowingly entering or remaining in any restricted building or grounds without lawful entry; violent entry and disorderly conduct on Capitol grounds; and theft of public money, property, or records. Barnett allegedly entered a restricted are of the Speaker of the House Nancy Pelosi.",Man Arrested for Illegally Entering Office of Speaker of the House Thirteen Charged in Federal Court Following Riot at the United States Capitol Barnett - Statement of Facts Barnett - Amended Complaint Barnett - Amended Statement of Facts,Initial Appearance 1/12/21 at 3pm CST (virtual – WDAR District Court),,"[/opa/pr/man-arrested-illegally-entering-office-speaker-house, /opa/pr/thirteen-charged-federal-court-following-riot-united-states-capitol, /opa/press-release/file/1351656/download, /usao-dc/press-release/file/1353191/download, /usao-dc/press-release/file/1353231/download]"
4,1:21-mj-59,"BAUER, Robert L.",,Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds,Bauer and Hemenway - Complaint Bauer and Hemenway - Statement of Facts,Arrested on 1/15/21 in the Western District of Kentucky.,1/15/2021,"[/opa/page/file/1355726/download, /opa/page/file/1355721/download]"


In [30]:
justice_df.residency.value_counts()

FL    4
NY    3
VA    3
TX    3
AL    2
MD    2
AR    2
PA    2
WA    1
LA    1
TN    1
WV    1
UT    1
AZ    1
WI    1
IA    1
CO    1
NH    1
IL    1
OH    1
ID    1
HI    1
Name: residency, dtype: int64

In [31]:
justice_df.to_json("processed/justice_df.json", orient="records", indent=2)

In [32]:
len(justice_df)

75

---

### Ingrest data from the Program on Extremism at GW

In [33]:
# https://extremism.gwu.edu/Capitol-Hill-Cases

In [34]:
!wget 'https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/CapitolHillTracker11521.xlsx' --directory-prefix="raw/" --output-document="raw/latest_extremism.xlsx"

--2021-01-19 06:07:20--  https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/CapitolHillTracker11521.xlsx
Resolving extremism.gwu.edu (extremism.gwu.edu)... 2606:4700::6812:825, 2606:4700::6812:925, 104.18.8.37, ...
Connecting to extremism.gwu.edu (extremism.gwu.edu)|2606:4700::6812:825|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11830 (12K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘raw/latest_extremism.xlsx’


2021-01-19 06:07:21 (23.9 MB/s) - ‘raw/latest_extremism.xlsx’ saved [11830/11830]



In [35]:
gw_df = pd.read_excel("raw/latest_extremism.xlsx", skiprows=1).dropna(how="all", axis=1)

In [36]:
gw_df.columns = (
    gw_df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [37]:
gw_df["datecharged"] = (pd.to_datetime(gw_df["datecharged"]).dt.date).fillna("")

In [38]:
gw_df.head(10)

Unnamed: 0,name,agearrest,gender,state,datecharged
0,"ALBERTS, Christopher Michael",33,M,MD,2021-01-07
1,"BARANYI, Thomas",28,M,NJ,2021-01-10
2,"BARNETT, Richard",60,M,AR,2021-01-07
3,"BAUER, Robert",Unknown,M,KY,
4,"BLACK, Joshua Matthew",44,M,AL,2021-01-13
5,"BLEDSOE, Matthew",Unknown,M,Unknown,
6,"BROCK, Larry Rendell",54,M,TX,2021-01-09
7,"BROWN, Terry",69,M,PA,2021-01-07
8,"CAMARGO, Samuel",Unknown,M,FL,2021-01-15
9,"CHANSLEY, Jacob Anthony",33,M,AZ,2021-01-11


In [39]:
gw_df.name = gw_df.name.str.strip()

In [40]:
len(gw_df)

58

### Merge with Justice data

In [41]:
# justice_gw_merge = pd.merge(
#     justice_df, gw_df, right_on="name", left_on="fullname", how="right"
# )

In [42]:
# len(justice_gw_merge)

In [43]:
# justice_gw_merge.head()

---

### Read data from the [Prosecution Project](https://theprosecutionproject.org/2021/01/14/prosecution-data-capitol-siege-of-january-6-2021/)

In [44]:
url2 = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ-NJiMr9_MVxsqTSB1sYkzOZSfg59m6ViR7qvjXef3O4txMuWYxh7TlTVcQAxzduCjhLxKP3dlXUhX/pub?output=csv"

In [45]:
cases = pd.read_csv(url2)

In [46]:
cases.columns = (
    cases.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace("-", "_", regex=False)
)

In [47]:
cases = cases.dropna(how="all")

In [48]:
cases["charges"] = cases["charges"].str.upper()

In [49]:
cases[["full_legal_name", "charges"]].head()

Unnamed: 0,full_legal_name,charges
0,Henry Tarrio,7 DC CODE 2506.01(B) POSSESSION OF LARGE CAPACITY AMMUNITION FEEDING DEVICE [2 COUNTS]
1,Matthew Council,18:1752(A)(1) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS
2,Bradley Rukstales,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"
3,Michael Curzio,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"
4,Cindy Fitchett,"18:1752(A) KNOWINGLY ENTERING OR REMAINING IN ANY RESTRICTED BUILDING OR GROUNDS WITHOUT LAWFUL AUTHORITY, OR KNOWINGLY, WITH INTENT TO IMPEDE GOVERNMENT BUSINESS OR OFFICIAL FUNCTIONS, ENGAGING IN DISORDERLY CONDUCT ON CAPITOL GROUNDS; 40:5104(E)(2) VIOLENT ENTRY AND DISORDERLY CONDUCT ON CAPITOL GROUNDS"


### Mean age of all arrestees related to the Capitol seige

In [50]:
round(cases.age.mean())

41

### Number of cases so far

In [51]:
len(cases)

163

### Gender counts for those charged

In [52]:
round(cases.gender.value_counts("normalize") * 100)

Male               86.0
Female             13.0
Unknown/unclear     1.0
Name: gender, dtype: float64

### Race/ethnicity of those charged?

In [53]:
round(cases.racial_ethnic_group.value_counts("normalize") * 100)

White/Caucasian                   94.0
Black/African/African American     4.0
Unknown                            1.0
Latino/Hispanic                    1.0
Asian/South Asian                  1.0
Name: racial_ethnic_group, dtype: float64

In [54]:
cases_race = (
    cases.groupby(["racial_ethnic_group"]).agg({"case_id": "size"}).reset_index()
).rename(columns={"case_id": "count"})

In [55]:
alt.Chart(cases_race).mark_bar().encode(
    x=alt.X("racial_ethnic_group:N", title="Race/ethnicity"),
    y=alt.Y("count:Q", title="Count"),
).properties(width=800, height=400, title="Cases by race/ethnicity")

---

### Cases that only involve illegally entering the Capitol

In [56]:
capitol_entry_cases = cases[
    cases["charges"].fillna("").str.contains("RESTRICTED BUILDING OR GROUNDS")
]

In [57]:
len(capitol_entry_cases)

59

In [58]:
round(capitol_entry_cases.racial_ethnic_group.value_counts("normalize") * 100)

White/Caucasian                   98.0
Black/African/African American     2.0
Name: racial_ethnic_group, dtype: float64

In [59]:
round(capitol_entry_cases.location_state.value_counts("normalize") * 100)

District of Columbia    100.0
Name: location_state, dtype: float64

In [60]:
round(capitol_entry_cases.gender.value_counts("normalize") * 100)

Male      88.0
Female    12.0
Name: gender, dtype: float64

---

### Export

In [61]:
cases.to_csv("raw/cases_prosecution_project.csv", index=False)