# 2022 Senate elections

In [1]:
import pandas as pd

In [2]:
url = 'https://en.wikipedia.org/wiki/2022_United_States_Senate_elections'

In [3]:
page = pd.read_html(url, header=0)

In [4]:
src = pd.DataFrame(page[9])

In [5]:
src = src[['Constituency', 'Incumbent', 'Incumbent.1', '2022 election ratings']]

In [6]:
src.rename(columns={'Constituency':'state', 'Incumbent':'incumbent', 'Incumbent.1':'last_election', '2022 election ratings':'cook_rating'}, inplace=True)

In [7]:
src[[ 'trash', 'party', 'trashy']] = src['last_election'].str.split(' ', expand=True)

In [8]:
src.drop([0, 35], inplace=True)

In [9]:
src.drop(['trash', "trashy"], axis=1, inplace=True)

In [10]:
src.head()

Unnamed: 0,state,incumbent,last_election,cook_rating,party
1,Alabama,Richard Shelby(retiring),64.0% R,Solid R,R
2,Alaska,Lisa Murkowski,44.4% R,Solid R,R
3,Arizona,Mark Kelly,51.2% D(2020 special)[d],Tossup,D(2020
4,Arkansas,John Boozman,59.8% R,Solid R,R
5,California,Alex Padilla,Appointed(2021)[e],Solid D,


In [11]:
src.loc[src['party'] == 'D(2020', 'party'] = 'D'
src.loc[src['incumbent'] == 'Alex Padilla', 'party'] = 'D'

In [12]:
src['last_election'] = src['last_election'].str.replace('(2020 special)[f]', "", regex=False).str.replace('(2020 special)[d]', "", regex=False).str.replace('Appointed(2021)[e]', "", regex=False)
src['incumbent'] = src['incumbent'].str.replace('(retiring)', "", regex=False)

In [13]:
src.head(100)

Unnamed: 0,state,incumbent,last_election,cook_rating,party
1,Alabama,Richard Shelby,64.0% R,Solid R,R
2,Alaska,Lisa Murkowski,44.4% R,Solid R,R
3,Arizona,Mark Kelly,51.2% D,Tossup,D
4,Arkansas,John Boozman,59.8% R,Solid R,R
5,California,Alex Padilla,,Solid D,D
6,Colorado,Michael Bennet,50.0% D,Solid D,D
7,Connecticut,Richard Blumenthal,63.2% D,Solid D,D
8,Florida,Marco Rubio,52.0% R,Lean R,R
9,Georgia,Raphael Warnock,51.0% D,Tossup,D
10,Hawaii,Brian Schatz,73.6% D,Solid D,D


In [14]:
df = src.copy()

In [15]:
df[['last_election_winner_margin', 'last_election_winner_party']] = df['last_election'].str.split(' ', expand=True)

In [16]:
df.loc[df['incumbent'] == 'Alex Padilla', 'last_election_winner_party'] = ''

In [17]:
df.drop(['last_election'], axis=1, inplace=True)

In [18]:
df.rename(columns={'party':"incumbent_party"}, inplace=True)

In [19]:
df['last_election_winner_margin'] = df['last_election_winner_margin'].str.replace('%', "", regex=False)

In [20]:
df.incumbent_party.value_counts()

R    20
D    14
Name: incumbent_party, dtype: int64

In [21]:
df.head()

Unnamed: 0,state,incumbent,cook_rating,incumbent_party,last_election_winner_margin,last_election_winner_party
1,Alabama,Richard Shelby,Solid R,R,64.0,R
2,Alaska,Lisa Murkowski,Solid R,R,44.4,R
3,Arizona,Mark Kelly,Tossup,D,51.2,D
4,Arkansas,John Boozman,Solid R,R,59.8,R
5,California,Alex Padilla,Solid D,D,,


---

In [22]:
ap_states = {
    "Alabama": "Ala.",
    "Alaska": "Alaska",
    "Arizona": "Ariz.",
    "Arkansas": "Ark.",
    "California": "Calif.",
    "Colorado": "Colo.",
    "Connecticut": "Conn.",
    "Delaware": "Del.",
    "Florida": "Fla.",
    "Georgia": "Ga.",
    "Hawaii": "Hawaii",
    "Idaho": "Iowa",
    "Illinois": "Idaho",
    "Indiana": "Ill.",
    "Iowa": "Ind.",
    "Kansas": "Kan.",
    "Kentucky": "Ky.",
    "Louisiana": "La.",
    "Maine": "Md.",
    "Maryland": "Mass.",
    "Massachusetts": "Maine",
    "Michigan": "Mich.",
    "Minnesota": "Minn.",
    "Mississippi": "Miss.",
    "Missouri": "Mo.",
    "Montana": "Mont.",
    "Nebraska": "Neb.",
    "Nevada": "Nev.",
    "New Hampshire": "N.H.",
    "New Jersey": "N.J.",
    "New Mexico": "N.M.",
    "New York": "N.Y.",
    "North Carolina": "N.C.",
    "North Dakota": "N.D.",
    "Ohio": "Ohio",
    "Oklahoma": "Okla.",
    "Oregon": "Ore.",
    "Pennsylvania": "Pa.",
    "Rhode Island": "R.I.",
    "South Carolina": "S.C.",
    "South Dakota": "S.D.",
    "Tennessee": "Tenn.",
    "Texas": "Texas",
    "Utah": "Utah",
    "Vermont": "Vt.",
    "Virginia": "Va.",
    "Washington": "Wash.",
    "West Virginia": "W.Va.",
    "Wisconsin": "Wis.",
    "Wyoming": "Wyo",
}

In [23]:
ap_states_postal = {
    "Ala.": "AL",
    "Alaska":"AK", 
    "Ariz.": "AZ",
    "Ark.": "AR",
    "Calif.":"CA", 
    "Colo.": "CO",
    "Conn.": "CT",
    "Del.": "DE",
    "Fla.": "FL",
    "Ga.": "GA",
    "Hawaii":"HI", 
    "Iowa": "IA",
    "Idaho": "ID",
    "Ill.": "IL",
    "Ind.": "IN",
    "Kan.": "KS",
    "Ky.": "KY",
    "La.": "LA",
    "Md.": "MD",
    "Mass.": "MA",
    "Maine": "ME",
    "Mich.": "MI",
    "Minn.": "MN",
    "Miss.": "MS",
    "Mo.": "MO",
    "Mont.": "MT",
    "Neb.": "NE",
    "Nev.": "NV",
    "N.H.": "NH",
    "N.J.": "NJ",
    "N.M.": "NM",
    "N.Y.": "NY",
    "N.C.": "NC",
    "N.D.": "ND",
    "Ohio": "OH",
    "Okla.": "OK",
    "Ore.": "OR",
    "Pa.": "PA",
    "R.I.": "RI",
    "S.C.": "SC",
    "S.D.": "SD",
    "Texas": "TX",
    "Tenn.": "TN",
    "Vt.": "VT",
    "Va.": "VA",
    "Wash.": "WA",
    "W.Va.": "WV",
    "Wis.": "WI",
    "Wyo": "WY",
}

In [24]:
df["state_ap"] = df["state"].map(ap_states)
df["state_postal"] = df["state_ap"].map(ap_states_postal)

In [25]:
df.head()

Unnamed: 0,state,incumbent,cook_rating,incumbent_party,last_election_winner_margin,last_election_winner_party,state_ap,state_postal
1,Alabama,Richard Shelby,Solid R,R,64.0,R,Ala.,AL
2,Alaska,Lisa Murkowski,Solid R,R,44.4,R,Alaska,AK
3,Arizona,Mark Kelly,Tossup,D,51.2,D,Ariz.,AZ
4,Arkansas,John Boozman,Solid R,R,59.8,R,Ark.,AR
5,California,Alex Padilla,Solid D,D,,,Calif.,CA


---

### Export

In [26]:
df.to_csv('data/processed/2022_senate_elections.csv', index=False)