In [None]:
# Used for installation of Levenshtein
import subprocess

def install(name):
    subprocess.call(['pip', 'install', name])
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.stats import fisher_exact
import statsmodels.api as sm

install("Levenshtein")
import Levenshtein as lev

%matplotlib inline

Setting State abbreviations (used in US heatmap) and setting a list of incumbents

In [None]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
# These are all house and senate members as of 2017, and all governors whose terms expired in 2018
house_inc = ['Bradley Byrne', 'Martha Roby', 'Mike Rogers', 'Robert Aderholt', 'Mo Brooks', 'Gary Palmer', 'Terri Sewell', 'Don Young', "Tom O'Halleran", 'Martha McSally', 'Raúl Grijalva', 'Paul Gosar', 'Andy Biggs', 'David Schweikert', 'Ruben Gallego', 'Trent Franks', 'Debbie Lesko', 'Kyrsten Sinema', 'Rick Crawford', 'French Hill', 'Steve Womack', 'Bruce Westerman', 'Doug LaMalfa', 'Jared Huffman', 'John Garamendi', 'Tom McClintock', 'Mike Thompson', 'Doris Matsui', 'Ami Bera', 'Paul Cook', 'Jerry McNerney', 'Jeff Denham', 'Mark DeSaulnier', 'Nancy Pelosi', 'Barbara Lee', 'Jackie Speier', 'Eric Swalwell', 'Jim Costa', 'Ro Khanna', 'Anna Eshoo', 'Zoe Lofgren', 'Jimmy Panetta', 'David Valadao', 'Devin Nunes', 'Kevin McCarthy', 'Salud Carbajal', 'Steve Knight', 'Julia Brownley', 'Judy Chu', 'Adam Schiff', 'Tony Cárdenas', 'Brad Sherman', 'Pete Aguilar', 'Grace Napolitano', 'Ted Lieu', 'Xavier Becerra', 'Jimmy Gomez', 'Norma Torres', 'Raul Ruiz', 'Karen Bass', 'Linda Sánchez', 'Ed Royce', 'Lucille Roybal-Allard', 'Mark Takano', 'Ken Calvert', 'Maxine Waters', 'Nanette Barragán', 'Mimi Walters', 'Lou Correa', 'Alan Lowenthal', 'Dana Rohrabacher', 'Darrell Issa', 'Duncan D. Hunter', 'Juan Vargas', 'Scott Peters', 'Susan Davis', 'Diana DeGette', 'Jared Polis', 'Scott Tipton', 'Ken Buck', 'Doug Lamborn', 'Mike Coffman', 'Ed Perlmutter', 'John B. Larson', 'Joe Courtney', 'Rosa DeLauro', 'Jim Himes', 'Elizabeth Esty', 'Lisa Blunt Rochester', 'Matt Gaetz', 'Neal Dunn', 'Ted Yoho', 'John Rutherford', 'Al Lawson', 'Ron DeSantis', 'Stephanie Murphy', 'Bill Posey', 'Darren Soto', 'Val Demings', 'Daniel Webster', 'Gus Bilirakis', 'Charlie Crist', 'Kathy Castor', 'Dennis A. Ross', 'Vern Buchanan', 'Tom Rooney', 'Brian Mast', 'Francis Rooney', 'Alcee Hastings', 'Lois Frankel', 'Ted Deutch', 'Debbie Wasserman Schultz', 'Frederica Wilson', 'Mario D▒\xadaz-Balart', 'Carlos Curbelo', 'Ileana Ros-Lehtinen', 'Buddy Carter', 'Sanford Bishop', 'Drew Ferguson', 'Hank Johnson', 'John Lewis', 'Tom Price', 'Karen Handel', 'Rob Woodall', 'Austin Scott', 'Doug Collins', 'Jody Hice', 'Barry Loudermilk', 'Rick W. Allen', 'David Scott', 'Tom Graves', 'Colleen Hanabusa', 'Tulsi Gabbard', 'Raúl Labrador', 'Mike Simpson', 'Bobby Rush', 'Robin Kelly', 'Dan Lipinski', 'Luis Gutiérrez', 'Mike Quigley', 'Peter Roskam', 'Danny K. Davis', 'Raja Krishnamoorthi', 'Jan Schakowsky', 'Brad Schneider', 'Bill Foster', 'Mike Bost', 'Rodney Davis', 'Randy Hultgren', 'John Shimkus', 'Adam Kinzinger', 'Cheri Bustos', 'Darin LaHood', 'Pete Visclosky', 'Jackie Walorski', 'Jim Banks', 'Todd Rokita', 'Susan Brooks', 'Luke Messer', 'André Carson', 'Larry Bucshon', 'Trey Hollingsworth', 'Rod Blum', 'Dave Loebsack', 'David Young', 'Steve King', 'Roger Marshall', 'Lynn Jenkins', 'Kevin Yoder', 'Mike Pompeo', 'Ron Estes', 'James Comer', 'Brett Guthrie', 'John Yarmuth', 'Thomas Massie', 'Hal Rogers', 'Andy Barr', 'Steve Scalise', 'Cedric Richmond', 'Clay Higgins', 'Mike Johnson', 'Ralph Abraham', 'Garret Graves', 'Chellie Pingree', 'Bruce Poliquin', 'Andy Harris', 'Dutch Ruppersberger', 'John Sarbanes', 'Anthony G. Brown', 'Steny Hoyer', 'John Delaney', 'Elijah Cummings', 'Jamie Raskin', 'Richard Neal', 'Jim McGovern', 'Niki Tsongas', 'Joseph P. Kennedy III', 'Katherine Clark', 'Seth Moulton', 'Mike Capuano', 'Stephen F. Lynch', 'Bill Keating', 'Jack Bergman', 'Bill Huizenga', 'Justin Amash', 'John Moolenaar', 'Dan Kildee', 'Fred Upton', 'Tim Walberg', 'Mike Bishop', 'Sander Levin', 'Paul Mitchell', 'Dave Trott', 'Debbie Dingell', 'John Conyers', 'Brenda Jones', 'Brenda Lawrence', 'Tim Walz', 'Jason Lewis', 'Erik Paulsen', 'Betty McCollum', 'Keith Ellison', 'Tom Emmer', 'Collin Peterson', 'Rick Nolan', 'Trent Kelly', 'Bennie Thompson', 'Gregg Harper', 'Steven Palazzo', 'Lacy Clay', 'Ann Wagner', 'Blaine Luetkemeyer', 'Vicky Hartzler', 'Emanuel Cleaver', 'Sam Graves', 'Billy Long', 'Jason T. Smith', 'Ryan Zinke', 'Greg Gianforte', 'Jeff Fortenberry', 'Don Bacon', 'Adrian Smith', 'Dina Titus', 'Mark Amodei', 'Jacky Rosen', 'Ruben Kihuen', 'Carol Shea-Porter', 'Ann McLane Kuster', 'Donald Norcross', 'Frank LoBiondo', 'Tom MacArthur', 'Chris Smith', 'Josh Gottheimer', 'Frank Pallone', 'Leonard Lance', 'Albio Sires', 'Bill Pascrell', 'Donald Payne Jr.', 'Rodney Frelinghuysen', 'Bonnie Watson Coleman', 'Michelle Lujan Grisham', 'Steve Pearce', 'Ben Ray Luján', 'Lee Zeldin', 'Peter T. King', 'Thomas Suozzi', 'Kathleen Rice', 'Gregory Meeks', 'Grace Meng', 'Nydia Velázquez', 'Hakeem Jeffries', 'Yvette Clarke', 'Jerrold Nadler', 'Dan Donovan', 'Carolyn Maloney', 'Adriano Espaillat', 'Joseph Crowley', 'José E. Serrano', 'Eliot Engel', 'Nita Lowey', 'Sean Patrick Maloney', 'John Faso', 'Paul Tonko', 'Elise Stefanik', 'Claudia Tenney', 'Tom Reed', 'John Katko', 'Louise Slaughter', 'Joseph D. Morelle', 'Brian Higgins', 'Chris Collins', 'G. K. Butterfield', 'George Holding', 'Walter B. Jones Jr.', 'David Price', 'Virginia Foxx', 'Mark Walker', 'David Rouzer', 'Richard Hudson', 'Robert Pittenger', 'Patrick McHenry', 'Mark Meadows', 'Alma Adams', 'Ted Budd', 'Kevin Cramer', 'Steve Chabot', 'Brad Wenstrup', 'Joyce Beatty', 'Jim Jordan', 'Bob Latta', 'Bill Johnson', 'Bob Gibbs', 'Warren Davidson', 'Marcy Kaptur', 'Mike Turner', 'Marcia Fudge', 'Pat Tiberi', 'Troy Balderson', 'Tim Ryan', 'David Joyce', 'Steve Stivers', 'Jim Renacci', 'Jim Bridenstine', 'Kevin Hern', 'Markwayne Mullin', 'Frank Lucas', 'Tom Cole', 'Steve Russell', 'Suzanne Bonamici', 'Greg Walden', 'Earl Blumenauer', 'Peter DeFazio', 'Kurt Schrader', 'Bob Brady', 'Dwight Evans', 'Mike Kelly', 'Scott Perry', 'Glenn Thompson', 'Ryan Costello', 'Pat Meehan', 'Mary Gay Scanlon', 'Brian Fitzpatrick', 'Bill Shuster', 'Tom Marino', 'Lou Barletta', 'Keith Rothfus', 'Brendan Boyle', 'Michael F. Doyle', 'Charlie Dent', 'Susan Wild', 'Lloyd Smucker', 'Matt Cartwright', 'Tim Murphy', 'Conor Lamb', 'David Cicilline', 'James Langevin', 'Mark Sanford', 'Joe Wilson', 'Jeff Duncan', 'Trey Gowdy', 'Mick Mulvaney', 'Ralph Norman', 'Jim Clyburn', 'Tom Rice', 'Kristi Noem', 'Phil Roe', 'Jimmy Duncan', 'Chuck Fleischmann', 'Scott DesJarlais', 'Jim Cooper', 'Diane Black', 'Marsha Blackburn', 'David Kustoff', 'Steve Cohen', 'Louie Gohmert', 'Ted Poe', 'Sam Johnson', 'John Ratcliffe', 'Jeb Hensarling', 'Joe Barton', 'John Culberson', 'Kevin Brady', 'Al Green', 'Michael McCaul', 'Mike Conaway', 'Kay Granger', 'Mac Thornberry', 'Randy Weber', 'Vicente Gonzalez', "Beto O'Rourke", 'Bill Flores', 'Sheila Jackson Lee', 'Jodey Arrington', 'Joaqu▒\xadn Castro', 'Lamar S. Smith', 'Pete Olson', 'Will Hurd', 'Kenny Marchant', 'Roger Williams', 'Michael Burgess', 'Blake Farenthold', 'Michael Cloud', 'Henry Cuellar', 'Gene Green', 'Eddie Bernice Johnson', 'John Carter', 'Pete Sessions', 'Marc Veasey', 'Filemon Vela Jr.', 'Lloyd Doggett', 'Brian Babin', 'Rob Bishop', 'Chris Stewart', 'Jason Chaffetz', 'John Curtis', 'Mia Love', 'Peter Welch', 'Rob Wittman', 'Scott Taylor', 'Bobby Scott', 'Donald McEachin', 'Tom Garrett Jr.', 'Bob Goodlatte', 'Dave Brat', 'Don Beyer', 'Morgan Griffith', 'Barbara Comstock', 'Gerry Connolly', 'Suzan DelBene', 'Rick Larsen', 'Jaime Herrera Beutler', 'Dan Newhouse', 'Cathy McMorris Rodgers', 'Derek Kilmer', 'Pramila Jayapal', 'Dave Reichert', 'Adam Smith', 'Dennis Heck', 'David McKinley', 'Alex Mooney', 'Evan Jenkins', 'Paul Ryan', 'Mark Pocan', 'Ron Kind', 'Gwen Moore', 'Jim Sensenbrenner', 'Glenn Grothman', 'Sean Duffy', 'Mike Gallagher', 'Liz Cheney']
senate_inc = ['Jeff Sessions', 'Luther Strange', 'Doug Jones', 'Richard Shelby', 'Dan Sullivan', 'Lisa Murkowski', 'Jeff Flake', 'John McCain', 'Jon Kyl', 'Tom Cotton', 'John Boozman', 'Dianne Feinstein', 'Kamala Harris', 'Cory Gardner', 'Michael Bennet', 'Chris Murphy', 'Richard Blumenthal', 'Tom Carper', 'Chris Coons', 'Bill Nelson', 'Marco Rubio', 'David Perdue', 'Johnny Isakson', 'Mazie Hirono', 'Brian Schatz', 'Jim Risch', 'Mike Crapo', 'Dick Durbin', 'Tammy Duckworth', 'Joe Donnelly', 'Todd Young', 'Joni Ernst', 'Chuck Grassley', 'Pat Roberts', 'Jerry Moran', 'Mitch McConnell', 'Rand Paul', 'Bill Cassidy', 'John Kennedy', 'Angus King', 'Susan Collins', 'Ben Cardin', 'Chris Van Hollen', 'Elizabeth Warren', 'Ed Markey', 'Debbie Stabenow', 'Gary Peters', ' name=DFLNPL ', 'Al Franken', 'Tina Smith', 'Roger Wicker', 'Thad Cochran', 'Cindy Hyde-Smith', 'Claire McCaskill', 'Roy Blunt', 'Jon Tester', 'Steve Daines', 'Deb Fischer', 'Ben Sasse', 'Dean Heller', 'Catherine Cortez Masto', 'Jeanne Shaheen', 'Maggie Hassan', 'Bob Menendez', 'Cory Booker', 'Martin Heinrich', 'Tom Udall', 'Kirsten Gillibrand', 'Chuck Schumer', 'Thom Tillis', 'Richard Burr', 'Heidi Heitkamp', 'John Hoeven', 'Sherrod Brown', 'Rob Portman', 'Jim Inhofe', 'James Lankford', 'Jeff Merkley', 'Ron Wyden', 'Bob Casey Jr.', 'Pat Toomey', 'Sheldon Whitehouse', 'Jack Reed', 'Lindsey Graham', 'Tim Scott', 'Mike Rounds', 'John Thune', 'Bob Corker', 'Lamar Alexander', 'Ted Cruz', 'John Cornyn', 'Orrin Hatch', 'Mike Lee', 'Bernie Sanders', 'Patrick Leahy', 'Tim Kaine', 'Mark Warner', 'Maria Cantwell', 'Patty Murray', 'Joe Manchin', 'Shelley Moore Capito', 'Tammy Baldwin', 'Ron Johnson', 'John Barrasso', 'Mike Enzi']
gov_inc = ["Bill Walker", "David Yukata Ige", "Christ Christie", "Terry McAuliffe"]
inc = house_inc + senate_inc + gov_inc

Reading in Data from tables, and removing candidates who ran uncontested

In [None]:
dem = pd.read_csv("rawdata/dem_candidates.csv")
rep = pd.read_csv("rawdata/rep_candidates.csv", encoding='latin-1')
# Remove candidates who ran uncontested
dem = dem[dem["Primary %"] != 100]
rep = rep[rep["Primary %"] != 100]
# Get census state population estimates
state_pops = pd.read_excel("https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/totals/nst-est2019-01.xlsx")
state_pops = state_pops[8:59][['table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)', "Unnamed: 11"]]
state_names = state_pops['table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)'].to_numpy()
state_pops = state_pops["Unnamed: 11"].to_numpy()
pops_dict = {}
for i in range(len(state_names)):
    pops_dict[us_state_abbrev[state_names[i][1:]]] = state_pops[i]

Republican table does not have partisan leans, so we'll use the democratic table to add them

In [None]:
district_lean = {}
districts = dem["District"].to_numpy()
leans = dem["Partisan Lean"].to_numpy()
for i in range(len(districts)):
    district_lean[districts[i]] = leans[i]

In [None]:
rep_districts = rep["District"].to_numpy()
rep_leans = []
for district in rep_districts:
    if district in district_lean.keys():
        rep_leans.append(district_lean[district])
    else:
        rep_leans.append("NaN")
rep["Partisan Lean"] = rep_leans

Use our list of incumbents to determine whether a candidate was an incumbent

In [None]:
vals = rep["Candidate"]
incumbent = []
added = False
for val in vals:
    for incum in inc:
        if lev.ratio(val, incum) > 0.9: # Using the Levenshtein ratio to account for differences in spelling/errors
            incumbent.append("Yes")
            added = True
            break
    if not added:
        incumbent.append("No")
    added = False

rep["Incumbent?"] = incumbent

added = False

vals = dem["Candidate"]
incumbent = []
for val in vals:
    for incum in inc:
        if lev.ratio(val, incum) > 0.9: # Using the Levenshtein ratio to account for differences in spelling/errors
            incumbent.append("Yes")
            added = True
            break
    if not added:
        incumbent.append("No")
    added = False
        
dem["Incumbent?"] = incumbent

Get Tables of endorsed democrats, and unendorsed democrats

In [None]:
prominent_dem = (dem["Biden Endorsed?"] == "Yes") | (dem["Warren Endorsed? "] == "Yes") | (dem["Sanders Endorsed?"] == "Yes") | (dem["Emily Endorsed?"] == "Yes") | (dem["Our Revolution Endorsed?"] == "Yes") | (dem["Justice Dems Endorsed?"] == "Yes") | (dem["PCCC Endorsed?"] == "Yes") | (dem["Indivisible Endorsed?"] == "Yes") | (dem["WFP Endorsed?"] == "Yes") | (dem["VoteVets Endorsed?"] == "Yes") | (dem["Guns Sense Candidate?"] == "Yes")
prominent_endorsed_dem = dem[prominent_dem]
unendorsed_dem = dem[~prominent_dem]
num_endorsed_dem = len(prominent_endorsed_dem)
prominent_rep = (rep["Trump Endorsed?"] == "Yes") | (rep["Bannon Endorsed?"] == "Yes") | (rep["Great America Endorsed?"] == "Yes") | (rep["NRA Endorsed?"] == "Yes") | (rep["Right to Life Endorsed?"] == "Yes") | (rep["Susan B. Anthony Endorsed?"] == "Yes") | (rep["Club for Growth Endorsed?"] == "Yes") | (rep["Koch Support?"] == "Yes") | (rep["House Freedom Support?"] == "Yes") | (rep["Tea Party Endorsed?"] == "Yes") | (rep["Main Street Endorsed?"] == "Yes") | (rep["Chamber Endorsed?"] == "Yes")
prominent_endorsed_rep = rep[prominent_rep]
unendorsed_rep = rep[~prominent_rep]
num_endorsed_rep = len(prominent_endorsed_rep)

Add a simple column that indicates whether a candidate recieved at least one endorsement

In [None]:
was_endorsed = []
for val in prominent_dem: # prominent_dem is a boolean array indicating whether a candidate was endorsed, set in cell above
    if val:
        was_endorsed.append("Yes")
    else:
        was_endorsed.append("No")
dem["Endorsed?"] = np.array(was_endorsed)
was_endorsed = []
for val in prominent_rep: # prominent_rep is a boolean array indicating whether a candidate was endorsed, set in cell above
    if val:
        was_endorsed.append("Yes")
    else:
        was_endorsed.append("No")
rep["Endorsed?"] = np.array(was_endorsed)

# Naive Difference in Means ATE Computation for Endorsement

In [None]:
#For Republicans
primary_result_endorsed_rep = prominent_endorsed_rep["Won Primary"]
primary_result_unendorsed_rep = unendorsed_rep["Won Primary"]
proportion_endorsed_rep = sum(primary_result_endorsed_rep == "Yes") / len(primary_result_endorsed_rep)
proportion_unendorsed_rep = sum(primary_result_unendorsed_rep == "Yes") / len(primary_result_unendorsed_rep)
diff_rep = proportion_endorsed_rep - proportion_unendorsed_rep
plt.bar(["Proportion Won Endorsed Rep", "Proportion Won Unendorsed Rep"], [proportion_endorsed_rep, proportion_unendorsed_rep], color=["red", "lightcoral"])

In [None]:
#For Democrats
primary_result_endorsed_dem = prominent_endorsed_dem["Won Primary"]
primary_result_unendorsed_dem = unendorsed_dem["Won Primary"]
proportion_endorsed_dem = sum(primary_result_endorsed_dem == "Yes") / len(primary_result_endorsed_dem)
proportion_unendorsed_dem = sum(primary_result_unendorsed_dem == "Yes") / len(primary_result_unendorsed_dem)
diff_dem = proportion_endorsed_dem - proportion_unendorsed_dem
plt.bar(["Proportion Won Endorsed Dem", "Proportion Won Unendorsed Dem"], [proportion_endorsed_dem, proportion_unendorsed_dem], color=["blue", "cornflowerblue"])

In [None]:
# ATE bar chart for both parties
plt.bar(["Naive Endorsed Difference Dem", "Naive Endorsed Difference Rep"], [diff_dem, diff_rep], color=["blue", "red"])

# Logistic Regression for Endorsement to Primary Victory

In [None]:
# For Democrats
Y = dem.fillna("No")["Won Primary"].replace("No", 0).replace("Yes", 1) # Missing Data in the Won Primary is No, checked manually
Z = dem["Endorsed?"].replace("No", 0).replace("Yes", 1)
Z = sm.add_constant(Z)
model = sm.Logit(Y, Z).fit()
print(model.summary())

In [None]:
# For Republicans
Y = rep.fillna("No")["Won Primary"].replace("No", 0).replace("Yes", 1) # Missing Data in the Won Primary is No, checked manually
Z = rep["Endorsed?"].replace("No", 0).replace("Yes", 1)
Z = sm.add_constant(Z)
model = sm.Logit(Y, Z).fit()
print(model.summary())

# Total number of endorsed candidates for each party

In [None]:
plt.bar(["Endorsed Candidates Dem", "Endorsed Candidates Rep"], [num_endorsed_dem, num_endorsed_rep], color=["blue", "red"])

# Heat Maps for where Endorsements are located

In [None]:
# For Democrats
# List of all state names
state_names = ["Alaska", "Alabama", "Arkansas", "American Samoa", "Arizona", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Guam", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana", "Kansas", "Kentucky", "Louisiana", "Massachusetts", "Maryland", "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", "North Carolina", "North Dakota", "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Virginia", "Virgin Islands", "Vermont", "Washington", "Wisconsin", "West Virginia", "Wyoming"]
state_rep = []
for val in prominent_endorsed_dem["District"]:
    for state in state_names:
        if state in val:
            state_rep.append(state)
            break
                        
for i in range(len(state_rep)):
    state_rep[i] = us_state_abbrev[state_rep[i]]
prominent_endorsed_dem["State"] = state_rep # Operating on the endorsed candidate table
state_count_rep = prominent_endorsed_dem[["State", "Candidate"]].groupby(["State"]).agg(['count']).reset_index() # Group by state, get count
new_df = pd.DataFrame()
new_df["State"] = state_count_rep.iloc[:, 0]
new_df["Count"] = state_count_rep.iloc[:, 1]

state_counts = dict(zip(new_df["State"], new_df["Count"]))
weighted_counts = []
for key in state_counts.keys():
    state_counts[key] = state_counts[key] / (pops_dict[key]) * 1000000 # Per 100,000

new_df = pd.DataFrame.from_dict(state_counts, orient="index").reset_index().rename(columns={"index": "State", 0 : "Endorsements per 100 Thousand"})


fig = px.choropleth(new_df,
                    locations="State",
                    color="Endorsements per 100 Thousand",
                    hover_name="State",
                    locationmode = 'USA-states')
fig.update_layout(
    title_text = 'Democrat Endorsement Locations',
    geo_scope='usa',
)
fig.show()



In [None]:
#For Republicans
state_rep = []
for val in prominent_endorsed_rep["District"]:
    for state in state_names:
        if state in val:
            state_rep.append(state)
            break
                        
for i in range(len(state_rep)):
    state_rep[i] = us_state_abbrev[state_rep[i]]
prominent_endorsed_rep["State"] = state_rep # Operating on the endorsed candidate table
state_count_rep = prominent_endorsed_rep[["State", "Candidate"]].groupby(["State"]).agg(['count']).reset_index() # Group by state, get count
new_df = pd.DataFrame()

new_df["State"] = state_count_rep.iloc[:, 0]
new_df["Count"] = state_count_rep.iloc[:, 1]
state_counts = dict(zip(new_df["State"], new_df["Count"]))
weighted_counts = []
for key in state_counts.keys():
    state_counts[key] = state_counts[key] / (pops_dict[key]) * 1000000 # Per 100,000

new_df = pd.DataFrame.from_dict(state_counts, orient="index").reset_index().rename(columns={"index": "State", 0 : "Endorsements per 100 Thousand"})

fig = px.choropleth(new_df,
                    locations="State",
                    color="Endorsements per 100 Thousand",
                    hover_name="State",
                    locationmode = 'USA-states')
fig.update_layout(
    title_text = 'Republican Endorsement Locations',
    geo_scope='usa',
)
fig.show()

# Graph Partisan Lean by Primary Vote %, to see if there's correlation

In [None]:
dem_lean = [float(x) for x in dem["Partisan Lean"].tolist()]
leans = dem_lean + [float(x) for x in rep["Partisan Lean"].tolist()]
dem_percent = [float(x) for x in dem["Primary %"].tolist()]
percents = dem_percent + [float(x) for x in rep["Primary %"].tolist()]
plt.scatter(leans, percents)
plt.xlabel("Partisan Lean")
plt.ylabel("Primary Vote % Recieved")

# Cross tabulation of Incumbency and Primary Victory

In [None]:
dem_inc_cross = pd.crosstab(dem.fillna("No")["Incumbent?"], dem.fillna("No")["Won Primary"])
rep_inc_cross = pd.crosstab(rep.fillna("No")["Incumbent?"], rep.fillna("No")["Won Primary"])
_, p_dem = fisher_exact(dem_inc_cross)
_, p_rep = fisher_exact(rep_inc_cross)
print(dem_inc_cross)
print(rep_inc_cross)
print(p_dem, p_rep)

Crosstabulation of non-incumbents for each party and Primary Victory

In [None]:
non_incumbent_dem = dem.fillna("No")[dem.fillna("No")["Incumbent?"] == "No"]
dem_array = ["Dem" for x in non_incumbent_dem["Incumbent?"]]
non_incumbent_rep = rep.fillna("No")[rep.fillna("No")["Incumbent?"] == "No"]
rep_array = ["Rep" for x in non_incumbent_rep["Incumbent?"]]
party_array = dem_array + rep_array # Array that indicates if a row is a dem or a republican
new_crosstab = pd.DataFrame()
new_crosstab["Party"] = party_array
#Combine 'Won Primary' Arrays
new_crosstab["Won Primary"] = non_incumbent_dem.fillna("No")["Won Primary"].tolist() + non_incumbent_rep.fillna("No")["Won Primary"].tolist()
crossed = pd.crosstab(new_crosstab["Party"], new_crosstab["Won Primary"])
print(crossed)
_, p_val_inc = fisher_exact(pd.crosstab(new_crosstab["Party"], new_crosstab["Won Primary"]))
print(p_val_inc)

# Cross Tabulation for Party Support and Won Primary

In [None]:
# For Democrats
print(pd.crosstab(dem.fillna("No")["Party Support?"], dem.fillna("No")["Won Primary"]))
dem_odds_ratio, p_dem = fisher_exact(pd.crosstab(dem.fillna("No")["Party Support?"], dem.fillna("No")["Won Primary"]))
# For Republicans
print(pd.crosstab(rep.fillna("No")["Rep Party Support?"], rep.fillna("No")["Won Primary"]))
rep_odds_ratio, p_rep = fisher_exact(pd.crosstab(rep.fillna("No")["Rep Party Support?"], rep.fillna("No")["Won Primary"]))
print(p_dem, p_rep)

Cross Tabulation for Candidates with No Party Support by Primary Victory

In [None]:
non_support_dem = dem.fillna("No")[dem.fillna("No")["Party Support?"] == "No"]
dem_array = ["Dem" for x in non_support_dem["Party Support?"]]
non_support_rep = rep.fillna("No")[rep.fillna("No")["Rep Party Support?"] == "No"]
rep_array = ["Rep" for x in non_support_rep["Rep Party Support?"]]
party_array = dem_array + rep_array
new_crosstab = pd.DataFrame()
new_crosstab["Party"] = party_array
new_crosstab["Won Primary"] = non_support_dem.fillna("No")["Won Primary"].tolist() + non_support_rep.fillna("No")["Won Primary"].tolist()
print(pd.crosstab(new_crosstab["Party"], new_crosstab["Won Primary"]))
_, p_no_support = fisher_exact(pd.crosstab(new_crosstab["Party"], new_crosstab["Won Primary"]))
print(p_no_support)

Get metrics for Endorsements and Party Support correlation

In [None]:
# For Dems
dem_party_supported = dem[dem["Party Support?"] == "Yes"]
val_dem = len(dem_party_supported[dem_party_supported["Won Primary"] == "Yes"]) / len(dem_party_supported)
print(val_dem)
print(len(dem_party_supported[dem_party_supported["Endorsed?"] == "Yes"]) / len(dem_party_supported))

In [None]:
# For Reps
rep_party_supported = rep[rep["Rep Party Support?"] == "Yes"]
val_rep = len(rep_party_supported[rep_party_supported["Won Primary"] == "Yes"]) / len(rep_party_supported)
print(val_rep)
print(len(rep_party_supported[rep_party_supported["Endorsed?"] == "Yes"]) / len(rep_party_supported))

# Get Victory Proportions for Candidates who were not party supported and who were not incumbents

In [None]:
# For Dems Endorsed and Unendorsed
dem_endorsed_no_party = prominent_endorsed_dem[(prominent_endorsed_dem["Party Support?"] != "Yes") & (prominent_endorsed_dem["Incumbent?"] != "Yes")]
dem_endorsed_val_no_party = len(dem_endorsed_no_party[dem_endorsed_no_party["Won Primary"] == "Yes"]) / len(dem_endorsed_no_party)
dem_unendorsed_no_party = unendorsed_dem[(unendorsed_dem["Party Support?"] != "Yes") & (unendorsed_dem["Incumbent?"] != "Yes")]
dem_unendorsed_val_no_party = len(dem_unendorsed_no_party[dem_unendorsed_no_party["Won Primary"] == "Yes"]) / len(dem_unendorsed_no_party)

In [None]:
# For Reps Endorsed and Unendorsed
rep_endorsed_no_party = prominent_endorsed_rep[(prominent_endorsed_rep["Rep Party Support?"] != "Yes") & (prominent_endorsed_rep["Incumbent?"] != "Yes")]
rep_endorsed_val_no_party = len(rep_endorsed_no_party[rep_endorsed_no_party["Won Primary"] == "Yes"]) / len(rep_endorsed_no_party)
rep_unendorsed_no_party = unendorsed_rep[(unendorsed_rep["Rep Party Support?"] != "Yes") & (unendorsed_rep["Incumbent?"] != "Yes")]
rep_unendorsed_val_no_party = len(rep_unendorsed_no_party[rep_unendorsed_no_party["Won Primary"] == "Yes"]) / len(rep_unendorsed_no_party)

In [None]:
# Get total proportion, without considering Endorsements
dem_party_unsupported = dem[(dem["Party Support?"] != "Yes") & (dem["Incumbent?"] != "Yes")]
val_dem = len(dem_party_unsupported[dem_party_unsupported["Won Primary"] == "Yes"]) / len(dem_party_unsupported)
rep_party_unsupported = rep[(rep["Rep Party Support?"] != "Yes") & (rep["Incumbent?"] != "Yes")]
val_rep = len(rep_party_unsupported[rep_party_unsupported["Won Primary"] == "Yes"]) / len(rep_party_unsupported)

Plot proportions side by side

In [None]:
f, ax = plt.subplots(figsize=(20, 10))

plt.bar(["Dem Won", "End Dem Won", "Unend Dem Won", "Rep Won", "End Rep Won", "Unend Rep Won"],
        [val_dem, dem_endorsed_val_no_party, dem_unendorsed_val_no_party, val_rep, rep_endorsed_val_no_party, rep_unendorsed_val_no_party],
        color=["blue", "cornflowerblue", "royalblue", "red", "lightcoral", "firebrick"])
plt.rcParams.update({'font.size': 15})

# Use Logistic Regression with Confounders and Endorsed

In [None]:
# For Dems
clean_dem_party_unsupported = dem.fillna(0).replace("Yes", 1).replace("No", 0) # Again we can fillna(0) perfectly fine, manually checked
regress_on = clean_dem_party_unsupported[['Party Support?', 'Endorsed?', "Incumbent?"]].iloc[:,:]

vals = clean_dem_party_unsupported["Won Primary"]
regress_on = sm.add_constant(regress_on)
model = sm.Logit(vals, regress_on).fit()
print(model.summary())

In [None]:
# For Reps
clean_rep_party_unsupported = rep.fillna(0).replace("Yes", 1).replace("No", 0) # Again we can fillna(0) perfectly fine, manually checked
regress_on = clean_rep_party_unsupported[['Rep Party Support?', "Endorsed?", "Incumbent?"]].iloc[:,:]

vals = clean_rep_party_unsupported["Won Primary"]
regress_on = sm.add_constant(regress_on)
model = sm.Logit(vals, regress_on).fit()
print(model.summary())