In [2]:
import pandas as pd
import numpy as np
import re 
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import MultipleLocator

In [3]:
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_serial_killers_in_the_United_States#Identified_serial_killers")

In [4]:
serial_killers = tables[0]
serial_killers

Unnamed: 0,Name,Years active,Proven victims,Possible victims,Status,Notes,Source(s)
0,"Ables, Tony",1970–1990,4,4+,Sentenced to death; commuted to life imprisonment,"Murdered robbery victim in 1970, and at least ...",[4]
1,"Acevedo, Francisco",1989–1996,3,3,Sentenced to 75 years to life,Strangled three prostitutes in New York betwee...,[5]
2,"Adams, Edward James",1920–1921,7,7,Killed by police during shootout,"Murdered seven people, including three policemen",[6]
3,"Agrue, John",1966–1982,3,3+,Died in 2009,Killed his sister-in-law in Illinois; paroled ...,[7]
4,"Albanese, Charles",1980–1981,3,3,Executed 1995,Poisoned family members with arsenic in Fox La...,[8]
...,...,...,...,...,...,...,...
707,"Wooten, Charles",1969–1993,3,3,Sentenced to life imprisonment,Killed two gas station attendants in Fort Wort...,[771]
708,"Wright, Douglas Franklin",1969–1991,7,7+,Executed 1996,First criminal executed by lethal injection in...,[772]
709,"Wuornos, Aileen",1989–1990,7,7,Executed 2002,"Known as the ""Damsel of Death""; shot seven men...",[773]
710,"Yates, Robert Lee",1975–1998,13,18+,Sentenced to death; commuted to life imprisonment,"Targeted women in and around Spokane, Washington",[774]


In [5]:
serial_killers.columns = serial_killers.columns.str.lower()
serial_killers

Unnamed: 0,name,years active,proven victims,possible victims,status,notes,source(s)
0,"Ables, Tony",1970–1990,4,4+,Sentenced to death; commuted to life imprisonment,"Murdered robbery victim in 1970, and at least ...",[4]
1,"Acevedo, Francisco",1989–1996,3,3,Sentenced to 75 years to life,Strangled three prostitutes in New York betwee...,[5]
2,"Adams, Edward James",1920–1921,7,7,Killed by police during shootout,"Murdered seven people, including three policemen",[6]
3,"Agrue, John",1966–1982,3,3+,Died in 2009,Killed his sister-in-law in Illinois; paroled ...,[7]
4,"Albanese, Charles",1980–1981,3,3,Executed 1995,Poisoned family members with arsenic in Fox La...,[8]
...,...,...,...,...,...,...,...
707,"Wooten, Charles",1969–1993,3,3,Sentenced to life imprisonment,Killed two gas station attendants in Fort Wort...,[771]
708,"Wright, Douglas Franklin",1969–1991,7,7+,Executed 1996,First criminal executed by lethal injection in...,[772]
709,"Wuornos, Aileen",1989–1990,7,7,Executed 2002,"Known as the ""Damsel of Death""; shot seven men...",[773]
710,"Yates, Robert Lee",1975–1998,13,18+,Sentenced to death; commuted to life imprisonment,"Targeted women in and around Spokane, Washington",[774]


In [6]:
len(serial_killers)

712

In [7]:
# dataframe of serial killers with the most possible victims (descending order)
possible_victims = serial_killers.sort_values(by=['possible victims'], ascending = False)
possible_victims

Unnamed: 0,name,years active,proven victims,possible victims,status,notes,source(s)
410,"LaLaurie, Delphine",1834,?,?,"Died in Paris, France",New Orleans socialite who tortured and maimed ...,[454]
423,"Little, Samuel",1970–2005,61,93+,Died in prison,"Known as ""The Choke-and-Stroke Killer""; transi...",[470][471]
559,"Ridgway, Gary",1982–1998,49,90+,Sentenced to life imprisonment,"Known as ""The Green River Killer""; targeted se...",[614]
135,"Cook, Anthony",1973–1981,9,9+,Sentenced to life imprisonment,Committed crimes with his brother Nathaniel Cook,[145]
15,"Anderson, Stephen Wayne",1973–1980,9,9+,Executed 2002,"Murdered people in Utah, Nevada, and California",[19][20][21]
...,...,...,...,...,...,...,...
297,"Harris, Michael Darnell",1981–1982,4,10,Sentenced to life imprisonment,"Raped, beat and strangled elderly women across...",[329]
416,"Lewingdon, Gary",1977–1978,10,10,Died in prison,"Together with brother Thaddeus Lewingdon, know...",[462][463]
382,"Kemper, Edmund",1964–1973,10,10,Sentenced to life imprisonment,"Known as ""The Co-Ed Killer""; murdered his gran...",[419]
711,"Zarinsky, Robert",1958–1974,2,10,Died in prison,Preyed on teenage girls in New Jersey,[775]


In [8]:
possible_victims.head(10)

Unnamed: 0,name,years active,proven victims,possible victims,status,notes,source(s)
410,"LaLaurie, Delphine",1834,?,?,"Died in Paris, France",New Orleans socialite who tortured and maimed ...,[454]
423,"Little, Samuel",1970–2005,61,93+,Died in prison,"Known as ""The Choke-and-Stroke Killer""; transi...",[470][471]
559,"Ridgway, Gary",1982–1998,49,90+,Sentenced to life imprisonment,"Known as ""The Green River Killer""; targeted se...",[614]
135,"Cook, Anthony",1973–1981,9,9+,Sentenced to life imprisonment,Committed crimes with his brother Nathaniel Cook,[145]
15,"Anderson, Stephen Wayne",1973–1980,9,9+,Executed 2002,"Murdered people in Utah, Nevada, and California",[19][20][21]
167,"Davis, Girvies",1978–1979,4,9+,Executed 1995,Accomplice of Richard Holman; killed robbery w...,[181]
320,"Hill, Ivan",1979–1994,9,9+,Sentenced to death,"Known as ""The 60 Freeway Killer""; responsible ...",[353]
546,"Rees, Melvin",1957–1959,5,9+,Died in prison,"Known as ""The Sex Beast""",[13][unreliable source?]
519,"Pierce, William",1970–1971,9,9+,Died in prison,One of his victims was the daughter of a South...,[572]
557,"Richards, Stephen D.",1876–1878,9,9+,Executed 1879,"Known as ""The Nebraska Fiend"", Richards murder...",[612]


In [9]:
years_active = serial_killers.sort_values(by='years active')

In [10]:
years_active.head(10)

Unnamed: 0,name,years active,proven victims,possible victims,status,notes,source(s)
448,"Mason, Samuel",1797–1803,20,20+,Killed/Died from injuries received during a sh...,River pirate associated with the Harpe brother...,[503]
295,Harpe Brothers,1797–1804,39,50+,Lynched 1799 (Micajah) Executed 1804 (Wiley),Brothers or cousins; America's first known ser...,[327]
96,"Cannon, Patty",1802–1829,4,25+,Died in prison awaiting trial,Gang leader who kidnapped slaves and free blac...,[103]
274,"Green, Samuel",1817–1821,2,2+,Executed 1822,"Known as ""The Terror of New England""",[308]
410,"LaLaurie, Delphine",1834,?,?,"Died in Paris, France",New Orleans socialite who tortured and maimed ...,[454]
580,"Rulloff, Edward H.",1844–1870,3,5,Executed 1871,"Known as ""The Genius Killer""; Canadian-born do...",[634][635]
288,"Hall, Andreas",1847–1848,3,3,Executed 1849,Prolific thief who killed three people during ...,[321]
311,"Helm, Boone",1850–1864,11,11+,Executed 1864,"Known as ""The Kentucky Cannibal""; mountain man...",[343][344]
201,"Evans, Franklin B.",1850–1872,2,5+,Executed 1874,"Known as ""The Northwood Murderer""; vagrant who...",[226]
705,"Wood, Isaac L.",1855,3,3,Executed 1858,"Poisoned his wife, brother and sister-in-law f...",[769]


In [11]:
unique_values = serial_killers['status'].unique()
# print the unique values
for value in unique_values:
    print(value)

Sentenced to death; commuted to life imprisonment
Sentenced to 75 years to life
Killed by police during shootout
Died in 2009
Executed 1995
nan
Died in prison awaiting execution
Executed 2010
Executed 1942
Died in prison
Sentenced to death
Sentenced to life imprisonment
Executed 2002
Sentenced to 61 years to life in prison
Died in Connecticut Hospital for the Insane
Executed 1999
Committed suicide to avoid arrest
Executed 1984
40 years imprisonment
Died in prison before execution
Executed 1926
Murdered in police custody
Committed suicide prior to execution
Executed 2005
Executed 1951
Unknown
Executed 1920
Executed 1949
Executed 1988
Sentenced to 50 years to life
Sentenced to 40 years in prison
Died while incarcerated at Henry Ford Allegiance Health
Executed 2016
Committed suicide in custody
Executed 1996
Executed 2019
Awaiting sentencing
Killed by intended victims
Executed 1984/1985 (Linwood and James) Sentenced to life imprisonment (Anthony)
Executed 1998
Sentenced to 150 years to lif

In [12]:
# reorganizing 'status' column for all values that include 'executed [year]'
serial_killers['status'] = np.where(serial_killers['status'].str.contains('Executed'), 'Executed', serial_killers['status'])

In [13]:
# calculate the counts for each unique value in the 'Status' column
category_counts = serial_killers['status'].value_counts()

# Print the top 15 values
print(category_counts.head(15))

status
Sentenced to life imprisonment                       168
Executed                                             167
Died in prison                                       107
Sentenced to death                                    69
Sentenced to death; commuted to life imprisonment     20
Died in prison awaiting execution                     18
Committed suicide to avoid arrest                     17
Committed suicide in prison                           10
Committed suicide in custody                           6
Committed suicide prior to execution                   6
Unknown                                                6
Sentenced to 75 years to life                          4
Committed suicide before trial                         4
Released in 2020                                       3
Incarcerated                                           2
Name: count, dtype: int64


In [44]:
# Convert 'Proven victims' column to numeric data type, ignoring errors
serial_killers['proven victims'] = pd.to_numeric(serial_killers['proven victims'], errors='coerce')

# Group by serial killer and calculate the sum of proven victims
killer_victim_counts = serial_killers.groupby('name')['proven victims'].sum()

# Sort the counts in descending order
killer_victim_counts = killer_victim_counts.sort_values(ascending=False)

# Print the sorted counts
print(killer_victim_counts.head(20))

name
Little, Samuel        61.0
Ridgway, Gary         49.0
Harpe Brothers        39.0
Harvey, Donald        37.0
Gacy, John Wayne      33.0
Espinosa, Felipe      32.0
Bundy, Ted            28.0
Corll, Dean           28.0
Corona, Juan          25.0
Gunness, Belle        25.0
Dominique, Ronald     23.0
Bartlett, Polly       22.0
Nelson, Earle         22.0
Stano, Gerald         22.0
Watts, Carl Eugene    22.0
Kearney, Patrick      21.0
Eyler, Larry          21.0
Bonin, William        21.0
Mason, Samuel         20.0
Kokoraleis, Andrew    18.0
Name: proven victims, dtype: float64


In [188]:
# calculating how many of these locations were similar (serial killer hotspots?)
# define a regex pattern to match state names (add more if needed)
state_pattern = r'\b((?:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New Hampshire|New Jersey|New Mexico|New York|North Carolina|North Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Rhode Island|South Carolina|South Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|West Virginia|Wisconsin|Wyoming))\b'

# extract state names from 'Notes' column and calculate the counts
state_counts = serial_killers['notes'].str.extractall(state_pattern, flags=re.IGNORECASE).value_counts()

# print the counts for each state
print(state_counts)

California        65
New York          34
Florida           34
Texas             29
Washington        19
Missouri          18
Ohio              16
Illinois          16
Michigan          15
Oregon            15
New Jersey        13
Pennsylvania      11
Kansas            10
North Carolina    10
Massachusetts     10
Indiana            9
Virginia           9
South Carolina     9
Kentucky           8
Colorado           7
Nevada             7
Utah               7
Georgia            7
Louisiana          6
Connecticut        6
Arizona            5
Maine              5
Nebraska           5
Maryland           5
Tennessee          4
Alaska             4
Oklahoma           4
West Virginia      4
New Mexico         3
Arkansas           3
Rhode Island       3
Wisconsin          3
Alabama            2
North Dakota       2
Montana            2
Minnesota          2
Delaware           2
Wyoming            2
New Hampshire      1
Mississippi        1
Iowa               1
Idaho              1
Hawaii       

Series([], dtype: int64)
