# Arrests/charges stemming from the Jan. 6 riot at the U.S. Capitol

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

### Justice Department list

In [6]:
justice_url = "https://www.justice.gov/opa/investigations-regarding-violence-capitol"

In [7]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
}
response_justice = requests.get(justice_url, headers=headers)

### Create a dataframe with the html table, and clean up the headers

In [8]:
justice = pd.read_html(response_justice.text, attrs={"class": "tablesaw"})[0]
strings = [",", "/", "(", ")", "_"]

### Clean up the headers

In [9]:
justice.columns = (
    justice.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
    .str.replace("*", "", regex=False)
    .str.replace("(s)", "s", regex=False)
)

### Make a copy of the dataframe so we can improve the original

In [10]:
justice_df = pd.DataFrame(justice.rename(columns={"name": "fullname"}).copy())

In [11]:
justice_df["fullname"] = justice_df["fullname"].str.replace(
    "BETANCUR, Bryan (aka Bryan Clooney, aka Maximo Clooney)",
    "BETANCUR, Bryan",
    regex=False,
)

In [12]:
justice_df.tail()

Unnamed: 0,case_number,fullname,charges,associated_documents,location_of_arrest,case_status,entry_last_updated
329,1:21-cr-223,"WOOD, Matthew","Obstruction of an Official Proceeding and Aiding and Abetting; Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Entering and Remaining in Certain Rooms in the Capitol Building; Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building",Wood - Complaint Wood - Indictment Wood - Statement of Facts,"North Carolina, Winston Salem","Arrested on 03/05/2021 in Winston Salem, North Carolina","March 25, 2021"
330,1:21-mj-296,"WORRELL, Christopher",Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Knowingly Engaging in Disorderly or Disruptive Conduct in any Restricted Building or Grounds; Violent Entry and Disorderly Conduct on Capitol Grounds; Obstruction of Justice/Congress; Knowingly Engaging in Act of Physical Violence in any Restricted Building or Grounds,Worrell - Complaint Worrell - Statement of Facts,"Florida, Naples","Arrested on 03/12/2021 in Naples, Florida","March 25, 2021"
331,1:21-cr-42,"WRIGLEY, Andrew","Entering and Remaining in a Restricted Building; Disorderly and Disruptive Conduct in a Restricted Building; Violent Entry and Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building",Wrigley - Complaint Wrigley - Statement of Facts Wrigley - Information,"Pennsylvania, Middle District",Arrested on 1/15/21 in the Middle District of Pennsylvania. Initial appearance via video on 1/25/21 in federal court in the District of Columbia.,"March 25, 2021"
332,1:21-cr-28,"YOUNG, Graydon",Conspiracy; Obstruction of an Official Proceeding and Aiding and Abetting; Destruction of Government Property and Aiding and Abetting; Entering and Remaining in a Restricted Building or Grounds; Tampering with Documents or Proceedings,"Meggs Steele Young - Complaint & Affidavit Caldwell, et al – Indictment Caldwell, Crowl, Watkins, Parker, Parker, Young, Steele, Meggs, Meggs, Harrelson - Second Superceding Indictment","Florida, Middle District",Arrested 2/15/21 in the Middle District of Florida,"March 26, 2021"
333,1:21-cr-191,"ZINK, Ryan Scott",Obstruction of an Official Proceeding; Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority and engages in any act of physical violence against any person or property in any restricted building or grounds.,Zink - Complaint & Statement of Facts,Pennsylvania,,"March 25, 2021"


In [13]:
justice_df["fullname"] = justice_df["fullname"].str.strip()

In [14]:
justice_df = justice_df[
    [
        "case_number",
        "fullname",
        "location_of_arrest",
        "charges",
        "associated_documents",
        "case_status",
        "entry_last_updated",
    ]
]

### Get out HTML table so we can parse it

In [15]:
response_justice = requests.get(justice_url)
soup = BeautifulSoup(response_justice.text, "html.parser")
table = soup.find("table")

### Clean up locations

In [16]:
justice_df["location_of_arrest"] = (
    justice_df["location_of_arrest"]
    .str.strip()
    .str.title()
    .str.replace(", Middle District", "", regex=False)
    .str.replace(", Southern District", "", regex=False)
    .str.replace(", Central District", "", regex=False)
    .str.replace(", Western District", "", regex=False)
    .str.replace(", Eastern District", "", regex=False)
    .str.replace(", Northern District", "", regex=False)
)

In [17]:
justice_df["location_of_arrest"] = justice_df["location_of_arrest"].fillna("Not listed")

In [18]:
new = justice_df["location_of_arrest"].str.split(", ", n=1, expand=True)
justice_df["location_of_arrest_state"] = new[0]
justice_df["location_of_arrest_other"] = new[1]
justice_df.drop(columns=["location_of_arrest"], inplace=True)

### Get the defendant document links from the table into a list

In [19]:
links = []

for tr in table.findAll("tr")[1:]:
    trs = tr.findAll("td")

    this_row_links = []

    for link in trs[3].findAll("a"):
        this_row_links.append("https://www.justice.gov" + link["href"])

    links.append(this_row_links)

In [20]:
justice_df["links"] = links

In [21]:
links = justice_df["links"].apply(pd.Series).fillna("")

In [22]:
df = pd.merge(justice_df, links, left_index=True, right_index=True)

In [23]:
df.drop(["links"], axis=1, inplace=True)

In [24]:
df.rename(
    columns={
        0: "link_0",
        1: "link_1",
        2: "link_2",
        3: "link_3",
        4: "link_4",
        5: "link_5",
    },
    inplace=True,
)

### How's the dataframe look? 

In [25]:
df.head()

Unnamed: 0,case_number,fullname,charges,associated_documents,case_status,entry_last_updated,location_of_arrest_state,location_of_arrest_other,link_0,link_1,link_2,link_3,link_4,link_5
0,1:21-cr-212,"ADAMS, Jared Hunter","Entering and Remaining in a Restricted Building; Disorderly and Disruptive Conduct in a Restricted Building; Violent Entry and Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building",Adams & Jared - Information Adams & Jared - Statement of Facts Adams & Jared - Complaint,"Arrest date & location: 3/9/2021 in Hilliard, Ohio","March 22, 2021",Ohio,Hilliard,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378326/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378331/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378336/download,,,
1,1:21-cr-115,"ALVEAR GONZALEZ, Eduardo Nicolas (aka, Alvear Gonzalez Eduardo Nicolas; aka, Nicolas Alvear)","Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building",Gonzalez - Statement of Facts Gonzalez - Complaint Gonzalez - Information,Arrested 2/12/21 in the Eastern District of Virginia. Initial Appearance 2/12/21 at 2:00pm.,"March 23, 2021",Virginia,,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378626/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378631/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378636/download,,,
2,1:21-cr-43,"ABUAL-RAGHEB, Rasha N.","Entering and Remaining in a Restricted Building; Disorderly and Disruptive Conduct in a Restricted Building; Violent Entry and Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building",Abual-Ragheb - Complaint Abual-Ragheb - Affidavit Abual-Ragheb - Indictment,Arrested 1/19/21 in the District of New Jersey,"February 4, 2021",New Jersey,,https://www.justice.gov/opa/page/file/1357081/download,https://www.justice.gov/opa/page/file/1357076/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1364746/download,,,
3,1:21-cr-84,"ADAMS, Daniel Page","Civil Disorder; Obstruction of an Official Proceeding; Assaulting, Resisting or Impeding Certain Officers; Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Disorderly Conduct in a Capitol Building; Impeding Passage Through the Capitol Grounds or Buildings; Parading, Demonstrating, or Picketing in a Capitol Building","Adams Connell - Complaint Adams Connell - Statement of Facts Adams, Connell - Amended Statement of Facts Adams, Connell - Amended Complaint Adams, Connell - Indictment",Arrested 1/16/21 in the Eastern District of Texas,"February 10, 2021",Texas,,https://www.justice.gov/opa/page/file/1355881/download,https://www.justice.gov/opa/page/file/1355876/download,https://www.justice.gov/opa/page/file/1358936/download,https://www.justice.gov/opa/page/file/1358931/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1366036/download,
4,1:21-mj-291,"ADAMS, Howard Berton",Obstruction of Law Enforcement During Civil Disorder; Obstruction of Justice/Congress; Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Disorderly conduct in restricted building or grounds; Disorderly Conduct on Capitol Grounds; Parading or demonstrating in Capitol building,Adams - Complaint Adams - Statement of Facts,"Arrest date & location: 3/10/21 in Edgewater, FL","March 22, 2021",Florida,Edgewater,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378306/download,https://www.justice.gov/usao-dc/case-multi-defendant/file/1378311/download,,,,


### How many cases?

In [26]:
len(df)

334

### How many cases involve conspiracy?

In [27]:
len(df[df["charges"].str.contains("Conspiracy")])

26

### Arrest locations

In [28]:
df.location_of_arrest_state.value_counts().head()

Not listed      41
Florida         32
Pennsylvania    30
Texas           26
New York        24
Name: location_of_arrest_state, dtype: int64

### California cases

In [29]:
df_ca = df[df["location_of_arrest_state"] == "California"]

In [30]:
df_ca[["fullname", "charges"]]

Unnamed: 0,fullname,charges
7,"ALLAN, Tommy Frederick","Theft of Government Property; Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Entering and Remaining on the Floor of Congress; Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building"
28,"BISIGNANO, Gina Michelle",Obstruction of an Official Proceeding; Aiding and Abetting; Civil Disorder; Destruction of Government Property; Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Engaging in Physical Violence in a Restricted Building or Grounds; Disorderly Conduct in a Capitol Building
65,"CORDON, Kevin Francisco",Obstruction of Justice/Congress; Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; Violent Entry and Disorderly Conduct on Capitol Grounds
82,"EHMKE, Hunter Allen","Destruction of Government Property; Obstruction of an Official Proceeding; Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building"
83,"EHRKE, Valerie Elaine","Entering and Remaining in a Restricted Building; Disorderly and Disruptive Conduct in a Restricted Building; Violent Entry and Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building"
105,"GOLD, Simone Melissa",Restricted Building or Grounds; Violent Entry and Disorderly Conduct
165,"LEWIS, Jacob","Entering and Remaining in a Restricted Building; Disorderly and Disruptive Conduct in a Restricted Building; Violent Entry and Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building"
245,"RILEY, Jorge A.","Obstructing of an Official Proceeding; Aiding and Aiding; Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building"
269,"SECOR, Christian","Obstruction of an Official Proceeding; Civil Disorder; Assaulting, Resisting, or Impeding Certain Officers; Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Entering and Remaining on the Floor of Congress; Entering and Remaining in the Gallery of Congress; Entering and Remaining in Certain Rooms in the Capitol Building; Disorderly Conduct in a Capitol Building; Parading, Demonstrating, or Picketing in a Capitol Building"
279,"SIMON, Mark","Entering and Remaining in a Restricted Building or Grounds; Disorderly and Disruptive Conduct in a Restricted Building or Grounds; Disorderly Conduct in a Capitol Building or Grounds; Parading, Demonstrating, or Picketing in a Capitol Building"


---

## Export

### All cases

In [31]:
df.to_json("processed/cases.json", orient="records", indent=2)

In [32]:
df.to_csv("processed/cases.csv", index=False)

### California cases

In [33]:
df_ca.to_json("processed/california-cases.json", orient="records", indent=2)

In [34]:
df_ca.to_csv("processed/california-cases.csv", index=False)