In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
import pandas as pd
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))

# House PVI

In [2]:
#Source M House: Cook Political PVI
house_pvi=pd.read_excel('Cook PVI 1997-2025.xlsx', sheet_name='119 (25-26)')

In [3]:
house_pvi.head(3)

Unnamed: 0,State,Number,Member,Party,2025 Cook PVI
0,Alabama,1,Barry Moore,R,R+27
1,Alabama,2,Shomari Figures,D,D+5
2,Alabama,3,Mike Rogers,R,R+23


In [None]:
#fix the even pvi numbers so they can be separated
def fix_evens(df, column_name):
  for index, row in df.iterrows():
    if row[column_name] == "EVEN":  # Check if the value is even.
      df.loc[index, column_name] = "N+0" # Assign N + 0 
  return df

fix_evens(house_pvi,'2025 Cook PVI')

In [None]:
#split pvi number to separate party and number
house_pvi[['2025 Cook PVI Party','2025 Cook PVI Number']]=house_pvi['2025 Cook PVI'].str.split("+", n=1, expand=True)

house_pvi['PVI Party Difference']=(house_pvi['Party']!=house_pvi['2025 Cook PVI Party'])
house_pvi.head(3)

In [None]:
#where PVI Party Difference = True, change PVI Number to 0
def fix_different_party(df):
  for index, row in df.iterrows():
    if row['PVI Party Difference'] == True:  # Check if PVI Party Difference is true.
      df.loc[index, '2025 Cook PVI Number'] = 0 # Assign N + 0 
  return df

fix_different_party(house_pvi)

In [None]:
#load congressional base data
meta_data=pd.read_csv('119th Congress.csv')

In [None]:
meta_data.head(3)

In [None]:
house_pvi.head(3)

In [None]:

# Create a dictionary to map state names to abbreviations
state_abbreviations = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
    'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA',
    'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD',
    'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO',
    'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH',
    'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC',
    'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT',
    'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY', 'District of Columbia':'DC'

}

# Add a new column 'State_Abbr' to the house_final DataFrame
meta_data['State_Abbr'] = meta_data['State'].map(state_abbreviations)
house_pvi['State_Abbr']=house_pvi['State'].map(state_abbreviations)

In [None]:
# Create the new column 'State_District'
meta_data['State_District'] = meta_data.apply(
    lambda row: str(row['State_Abbr']) + '-' + (
        "{:02d}".format(int(float(row['District'])))  # Convert to float first, then to int
        if str(row['District']).replace('.', '', 1).isdigit()  # Check if it's numeric, allowing for decimal points
        else str('AL') # Use state code twice for 'At Large' or non-numeric district values
    ),
    axis=1
)

house_pvi['State_District'] = house_pvi.apply(
    lambda row: str(row['State_Abbr']) + '-' + (
        "{:02d}".format(int(float(row['Number'])))  # Convert to float first, then to int
        if str(row['Number']).replace('.', '', 1).isdigit()  # Check if it's numeric, allowing for decimal points
        else str('AL') # Use state code twice for 'At Large' or non-numeric district values
    ),
    axis=1
)

In [None]:
temp_source_pvi=pd.merge(meta_data,house_pvi,left_on='State_District', right_on='State_District', how='left')

In [None]:
source_pvi_house=temp_source_pvi[temp_source_pvi['Chamber']=="House"]
source_pvi_house.head()

In [None]:
#show rows of pvi_house that are empty of pvi values - should only be 3 delegates from
#DC, Puerto Rico, and Virgin Islands

empty_pvi_rows = source_pvi_house[source_pvi_house['2025 Cook PVI Number'].isnull()]
empty_pvi_rows

In [None]:
#select only relevant columns
source_pvi_house=source_pvi_house[['Name', 'Chamber', 'bioguide_id', 'Party_x',
        '2025 Cook PVI Party',
       '2025 Cook PVI Number', 'PVI Party Difference']]

source_pvi_house = source_pvi_house.rename(columns={'Party_x': 'Party'})

In [None]:
source_pvi_house.head(3)

In [None]:
source_pvi_house.to_csv('house_pvi.csv')

# Senate

In [4]:
senate_pvi=pd.read_csv('2025 PVI States.csv')

In [5]:
senate_pvi.head(3)

Unnamed: 0,State,2025 PVI,Raw PVI,Rank (D to R)
0,Alabama,R+15,R+14.81,44
1,Alaska,R+6,R+6.46,32
2,Arizona,R+2,R+2.06,27


In [6]:
#split pvi number to separate party and number
senate_pvi[['2025 Cook PVI Party','2025 Cook PVI Number']]=senate_pvi['Raw PVI'].str.split("+", n=1, expand=True)


senate_pvi.head(3)

Unnamed: 0,State,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number
0,Alabama,R+15,R+14.81,44,R,14.81
1,Alaska,R+6,R+6.46,32,R,6.46
2,Arizona,R+2,R+2.06,27,R,2.06


In [7]:
#load congressional base data
meta_data=pd.read_csv('119th Congress.csv')
meta_data_senate=meta_data[meta_data['Chamber']=="Senate"]

In [8]:
meta_data_senate.head(3)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party
7,Katie Boyd Britt,Senate,B001319,Alabama,,Republican
8,Tommy Tuberville,Senate,T000278,Alabama,,Republican
10,Lisa Murkowski,Senate,M001153,Alaska,,Republican


In [12]:
meta_data_senate['Party_Letter']=meta_data_senate['Party'].str[:1]

In [13]:
meta_data_senate.head(5)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter
7,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R
8,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R
10,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R
11,Dan Sullivan,Senate,S001198,Alaska,,Republican,R
21,Ruben Gallego,Senate,G000574,Arizona,,Democratic,D


In [14]:
temp_senate_pvi=pd.merge(meta_data_senate,senate_pvi,left_on='State', right_on='State', how='left')
temp_senate_pvi.head()

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number
0,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81
1,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81
2,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46
3,Dan Sullivan,Senate,S001198,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46
4,Ruben Gallego,Senate,G000574,Arizona,,Democratic,D,R+2,R+2.06,27,R,2.06


In [15]:
#check if party of congressperson is equal to cook PVI lean
temp_senate_pvi['PVI Party Difference']=(temp_senate_pvi['Party_Letter']!=temp_senate_pvi['2025 Cook PVI Party'])
temp_senate_pvi.head(3)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number,PVI Party Difference
0,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
1,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
2,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46,False


In [16]:
temp_senate_pvi['PVI Party Difference'].sum()

13

In [17]:
#where PVI Party Difference = True, change PVI Number to 0
def fix_different_party(df):
  for index, row in df.iterrows():
    if row['PVI Party Difference'] == True:  # Check if PVI Party Difference is true.
      df.loc[index, '2025 Cook PVI Number'] = 0 # Assign N + 0 
  return df

fix_different_party(temp_senate_pvi)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number,PVI Party Difference
0,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
1,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
2,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46,False
3,Dan Sullivan,Senate,S001198,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46,False
4,Ruben Gallego,Senate,G000574,Arizona,,Democratic,D,R+2,R+2.06,27,R,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,James C. Justice,Senate,J000312,West Virginia,,Republican,R,R+21,R+20.96,50,R,20.96,False
96,Tammy Baldwin,Senate,B001230,Wisconsin,,Democratic,D,EVEN,R+0.25,22,R,0,True
97,Ron Johnson,Senate,J000293,Wisconsin,,Republican,R,EVEN,R+0.25,22,R,0.25,False
98,John Barrasso,Senate,B001261,Wyoming,,Republican,R,R+23,R+23.23,51,R,23.23,False


In [18]:
#select only relevant columns
source_pvi_senate=temp_senate_pvi[['Name', 'Chamber', 'bioguide_id',
       '2025 Cook PVI Number']]

source_pvi_senate.head(3)

Unnamed: 0,Name,Chamber,bioguide_id,2025 Cook PVI Number
0,Katie Boyd Britt,Senate,B001319,14.81
1,Tommy Tuberville,Senate,T000278,14.81
2,Lisa Murkowski,Senate,M001153,6.46


In [19]:
source_pvi_senate.to_csv('senate_pvi.csv')