# Other data sources for Capitol arrests

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [3]:
import altair as alt
import altair_latimes as lat
import numpy as np

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

### Get data from the Program on Extremism at GW

In [6]:
!wget 'https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/CapitolHillTracker041321.xlsx' --directory-prefix="raw/" --output-document="raw/latest_extremism.xlsx"

--2021-05-03 08:46:58--  https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/CapitolHillTracker041321.xlsx
Resolving extremism.gwu.edu (extremism.gwu.edu)... 2606:4700::6812:925, 2606:4700::6812:825, 104.18.8.37, ...
Connecting to extremism.gwu.edu (extremism.gwu.edu)|2606:4700::6812:925|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 22985 (22K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘raw/latest_extremism.xlsx’


2021-05-03 08:46:59 (380 KB/s) - ‘raw/latest_extremism.xlsx’ saved [22985/22985]



In [7]:
df = pd.read_excel("raw/latest_extremism.xlsx")

In [8]:
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [9]:
df.head()

Unnamed: 0,name,gender,state,datecharged
0,"ABUAL-RAGHEB, Rasha",F,NJ,2021-01-17
1,"ADAMS, Daniel Page",M,TX,2021-01-15
2,"ADAMS, Howard Berton",M,FL,2021-03-08
3,"ADAMS, Jared Hunter",M,OH,2021-03-08
4,"ALAM, Zachary",M,PA,2021-01-25


In [10]:
df["datecharged"] = (pd.to_datetime(df["datecharged"]).dt.date).fillna("")

In [11]:
df.name = df.name.str.strip()

In [12]:
len(df)

374

In [13]:
df.state.value_counts().head()

TX    37
FL    36
PA    36
NY    27
VA    20
Name: state, dtype: int64

---

### Read data from the [Prosecution Project](https://theprosecutionproject.org/2021/01/14/prosecution-data-capitol-siege-of-january-6-2021/)

In [14]:
url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ-NJiMr9_MVxsqTSB1sYkzOZSfg59m6ViR7qvjXef3O4txMuWYxh7TlTVcQAxzduCjhLxKP3dlXUhX/pub?output=csv"

In [15]:
counties_url = "https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/GWUPOE_CapitolSiegeCounties_Apr2021.xlsx"

In [16]:
!wget 'https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/GWUPOE_CapitolSiegeCounties_Apr2021.xlsx' --directory-prefix="raw/" --output-document="raw/counties.xlsx"

--2021-05-03 08:46:59--  https://extremism.gwu.edu/sites/g/files/zaxdzs2191/f/GWUPOE_CapitolSiegeCounties_Apr2021.xlsx
Resolving extremism.gwu.edu (extremism.gwu.edu)... 2606:4700::6812:925, 2606:4700::6812:825, 104.18.8.37, ...
Connecting to extremism.gwu.edu (extremism.gwu.edu)|2606:4700::6812:925|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 25921 (25K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘raw/counties.xlsx’


2021-05-03 08:47:00 (375 KB/s) - ‘raw/counties.xlsx’ saved [25921/25921]



In [17]:
cases = pd.read_csv(url, dtype={"age": float}, parse_dates=["Date"])

In [18]:
counties = pd.read_excel(
    "raw/counties.xlsx",
    dtype={
        "stateid": str,
        "countyid": str,
    },
)

In [19]:
counties.columns = (
    counties.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace("-", "_", regex=False)
)

In [20]:
cases.columns = (
    cases.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace("-", "_", regex=False)
)

In [21]:
cases = cases.dropna(how="all")

In [22]:
cases.columns

Index(['date', 'date_descriptor', 'case_id', 'group_identifier',
       'full_legal_name', 'first_name', 'family_name', 'other_names_aliases',
       'co_offenders', 'name_of_case', 'jurisdiction', 'location_country',
       'location_state', 'location_city', 'group_affiliation', 'charges',
       'additional_details', 'age', 'gender', 'racial_ethnic_group',
       'short_narrative'],
      dtype='object')

In [23]:
cases["charges"] = cases["charges"].str.upper()

### Federal cases

In [24]:
fed_cases = cases[cases["jurisdiction"] == "Federal"].copy()

In [25]:
len(fed_cases)

410

In [26]:
fed_cases[["date", "date_descriptor", "full_legal_name", "short_narrative"]].head()

Unnamed: 0,date,date_descriptor,full_legal_name,short_narrative
1,01/06/2021,Crime/attack,Kash Lee Kelly,"Kash Lee Kelly, a ""Trump-loving former Chicago-area Latin Kings gangster"" has been charged for ""his alleged, and extensively documented, trespassing into the U.S. Capitol on January 6....Drawing on widely-publicized photos and Kelly's own social media accounts, federal agents have charged the northwestern Indiana resident with unlawfully entering the seat of government with fellow Trump supporters...The Department of Justice indicted Kelly and 11 other members of the Latin Kings in 2017 for running a drug-dealing ring, although he avoided the racketeering charges brought against his peers. According to the Northwest Indiana Times, Kelly was convicted and had been awaiting sentencing when he traveled to D.C.—and said in an interview he believed he was allowed to make the trip. (Buzzfeed News)"""
57,01/07/2021,Indictment,Lonnie Leroy Coffman,"Lonnie Leroy Coffman, 70, of Falkville, Alabama was arrested after USCP Bomb Squad members subsequently searched a vehicle registered to Coffman vehicle and secured one black handgun, one M4 Carbine assault rifle along with rifle magazines loaded with ammunition, and components for the construction of eleven “Molotov Cocktails” in the form of mason jars filled with ignitable substances, rags, and lighters. Coffman was subsequently searched and found to have on his person a 9mm Smith & Wesson handgun, a 22-caliber derringer style handgun, and two sets of vehicle keys that matched the truck. In his truck was a notepad with contact details for Senator Ted Cruz, Sean Hannity, and a list of ""good"" people and ""bad"" people, including a judge."
58,01/07/2021,Complaint,Anthime Joseph Gionet,"January 2021, Anthime Joseph Gionet, 33, was arrested and charged with Knowingly Entering or Remaining in any Restricted Building or Grounds Without Lawful Authority; and Violent Entry and Disorderly Conduct on Capitol Grounds. Gionet is known to be a far-right media personality nicknamed “Baked Alaska” known for livestreaming himself participating in illegal activity. While in the Capitol building he carried out a 27-minute livestream video. Gionet has pending misdemeanor assault charges in Arizona."
79,01/08/2021,Complaint,Derrick Evans,"On January 6th 2021 West Virginia Delegate Derrick Evans livestreamed footage of himself entering the Capitol building in Washington DC. Evans, a newly elected state representative from West Virginia, reportedly deleted the video soon after the siege ended, but it soon re-emerged on Reddit. Days later, Evans resigned from his office in the House of Delegates."
80,01/08/2021,Complaint,Adam Christian Johnson,"On Jan. 6, 2021, Adam Johnson illegally entered the United States Capitol during the Capitol Riots and removed the Speaker of the House’s lectern from where it had been stored on the House side of the Capitol building. He is pictured in a photo that has been spread on the internet carrying the lectern."


In [27]:
fed_cases[fed_cases["short_narrative"].fillna("").str.contains("California")][
    ["date", "date_descriptor", "full_legal_name", "short_narrative"]
]

Unnamed: 0,date,date_descriptor,full_legal_name,short_narrative
152,02/03/2021,Indictment,Jorge Aaron Riley,"Jorge Riley was charged in Washington, DC after he was seen in photos and video inside the Capitol during the insurrection on Jan. 6. On LinkedIn, he is titled the 'corresponding secretary at California Republican Assembly.' Riley is charged with obstructing an official proceeding, illegally entering a restricted building and disorderly conduct at the Capitol for allegedly entering Pelosi’s office."
183,02/09/2021,Indictment,Valerie Elaine Ehrke,"Valerie Elaine Ehrke’s was arrested, ""after a friend they told about video footage on the California woman’s Facebook page from inside the Capitol on January 6 contacted the FBI. The footage, which is described in a complaint charging Ehrke with trespassing and disorderly conduct offenses, shows “a group of people entering the U.S. Capitol building with a caption reading, ‘We made it inside, right before they shoved us all out. I took off when I felt pepper spray in my throat! Lol.’” In a Jan. 13 interview with FBI agents, Ehrke admitted going to the Capitol riot because “she wanted to be part of the crowd.” (Buzzfeed News)"""
187,02/09/2021,Indictment,Jacob Lewis,"According to prosecutors, Jacob Lewis, a California-based gym owner, flew from CA to DC ""to attack the Capitol...[and] stop the certification of the electoral college."" He posted videos of himself on Instagram from inside the Capitol. On 1/11/21 FBI received a telephone tip identifying Lewis from the defendant's friend. Lewis had previously asked this individual to get him ammunition after showing him pictures of guns he claimed to own. Lewis admitted entering the Capitol to interrogators be claims he was ""escorted"" by police and not prevented from entering. Lewis is shown in the court records wearing a red Trump winter hat."
257,02/26/2021,Indictment,Christian Alexander Secor,"""Eleven different tipsters identified Secor to the FBI, providing copies of photos and videos of him inside the Senate chamber and sitting in the chair of the presiding officer on the dais, the agency stated in court documents. In the photos, the UCLA student is wearing a red Make America Great Again hat and carrying an America First flag. The FBI said Secor is a self-identified Groyper, a network of ""alt right figures who are vocal supporters of white supremacist and 'America First' podcaster Nick Fuentes. The FBI stated a tipster reported Secor founded the campus organization ""America First Bruins"" and is a member of Bruins Republicans. The FBI stated Secor can be seen in a group of rioters pushing their way into the building. Court documents state a tipster told the FBI that Secor moved back in with his mother after the riot, got rid of his phone and car and bragged that he would not be caught for his involvement. The organization Left Coast Right Watch previously reported that Secor was one of the young men from southern California who chanted and shouted ""America First"" and streamed video of themselves tearing down the silver monolith that appeared in California last April."" (USA Today)"
401,04/01/2021,Indictment,Sean Carlo Cordon,"Brothers Kevin Francisco Cordon, 33, and Sean Carlo Cordon, 35, of California, have both been charged following their participation in the Capitol riot. The Cordon brothers were identified after they were seen in a video posted on a Finnish news site."
407,04/02/2021,Indictment,Kevin Francisco Cordon,"Brothers Kevin Francisco Cordon, 33, and Sean Carlo Cordon, 35, of California, have both been charged following their participation in the Capitol riot. The Cordon brothers were identified after they were seen in a video posted on a Finnish news site."
430,04/08/2021,Indictment,Jeffrey Alexander Smith,"""Following the events on January 6, 2021, the Federal Bureau of Investigation (FBI) received tips at the FBI National Threat Operations Center (NTOC) from individuals who reported that they identified JEFFREY ALEXANDER SMITH, aka ALEX SMITH, as being involved in the events at the Capitol. On January 7, 2021, one individual (“Witness-1”) who identified themselves and who has known SMITH for an extended period of time had discovered a photo of SMITH inside the Capitol on social media which had been posted to Instagram by @homegrownterrorists. The photo depicts SMITH wearing a black jacket and a Trump hat, with a red cell phone in his right hand which is raised in the air. On January 8, 2021, a tip was received by the FBI Tipline from an individual (“Witness- 2”) who grew up with SMITH in Coronado, California, and who texted with SMITH via Instagram after the riots. Witness-2 provided screen shots of the messages which included SMITH writing, “I’m a Patriot”, “I stormed the capital” (sic). In the text, SMITH stated that his purpose was “To send a message that Americans are[n’] t going to take a fraudulent election.” SMITH also wrote to Witness-2, “There is no way in hell I was going to drive 38 hours from San Diego and not walk right through the front of the capital (sic) building.” Witness-2 also observed that SMITH had since deleted his Instagram account...SMITH told the [FBI] agent that he drove 38 hours to Washington, DC, to attend the speech by President Trump on January 6, 2021. SMITH said that his girlfriend traveled by air and met him in Washington, D.C. SMITH told his girlfriend to remain at the hotel instead of getting into all the chaos at the U.S. Capitol. SMITH also identified the hotel he stayed at in Washington, D.C. SMITH admitted that when he approached the U.S. Capitol it was a chaotic scene. He heard people on loudspeakers, he saw tear gas, and heard what he thought were flash bangs going off. SMITH admitted he walked into the Capitol on January 6, 2021 and remained in the Capitol for about 30 minutes. SMITH admitted to deleting his Instagram account after he began receiving threats related to his involvement in the Capitol events. (Statement of Facts)"""


### Mean age of all arrestees related to the Capitol seige

In [28]:
fed_cases["age"] = fed_cases["age"].str.replace("#", "", regex=False)

In [29]:
# cases["age"] = cases["age"].astype(float)

In [30]:
# cases["age"].mean()

### Number of federal cases so far

In [31]:
len(fed_cases)

410

### Gender counts for those charged

In [32]:
round(fed_cases.gender.value_counts("normalize") * 100)

Male      87.0
Female    13.0
Name: gender, dtype: float64

### Race/ethnicity of those charged?

In [33]:
cases_race = (
    fed_cases.groupby(["racial_ethnic_group"]).agg({"case_id": "size"}).reset_index()
).rename(columns={"case_id": "count"})

In [34]:
cases_race.sort_values("count", ascending=False)

Unnamed: 0,racial_ethnic_group,count
6,White/Caucasian,367
3,Latino/Hispanic,13
5,Unknown,13
2,Black/African/African American,6
4,Middle Eastern/North African,2
0,Asian/South Asian,1
1,Biracial/Multiracial,1


### Counties

In [35]:
counties.stateid = counties.stateid.str.zfill(2)
counties.countyid = counties.countyid.str.zfill(5)

In [36]:
counties.drop(["countyid2"], inplace=True, axis=1)

In [37]:
counties.sort_values("cases", ascending=False).head(10)

Unnamed: 0,county,state,stateid,countyid,cases
164,Franklin,OH,39,39049,6
13,Los Angeles,CA,6,6037,6
175,Bucks,PA,42,42017,4
208,Dallas,TX,48,48113,4
31,Brevard,FL,12,12009,4
33,Clay,FL,12,12019,3
38,Marion,FL,12,12083,3
41,Orange,FL,12,12095,3
43,Pinellas,FL,12,12103,3
214,Harris,TX,48,48201,3


In [38]:
counties[counties["state"] == "CA"].sort_values("cases", ascending=False).head(10)

Unnamed: 0,county,state,stateid,countyid,cases
13,Los Angeles,CA,6,6037,6
14,Orange,CA,6,6059,2
16,Riverside,CA,6,6065,2
12,Colusa,CA,6,6011,1
15,Placer,CA,6,6061,1
17,Sacramento,CA,6,6067,1
18,San Bernardino,CA,6,6071,1
19,San Diego,CA,6,6073,1
20,San Francisco,CA,6,6075,1
21,Santa Clara,CA,6,6085,1
