In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
import pandas as pd
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))

# House PVI

In [2]:
#Source M House: Cook Political PVI
house_pvi=pd.read_excel('Cook PVI 1997-2025.xlsx', sheet_name='119 (25-26)')

In [3]:
house_pvi.head(3)

Unnamed: 0,State,Number,Member,Party,2025 Cook PVI
0,Alabama,1,Barry Moore,R,R+27
1,Alabama,2,Shomari Figures,D,D+5
2,Alabama,3,Mike Rogers,R,R+23


In [4]:
#split pvi number to separate party and number
house_pvi[['2025 Cook PVI Party','2025 Cook PVI Number']]=house_pvi['2025 Cook PVI'].str.split("+", n=1, expand=True)

house_pvi.head(3)

Unnamed: 0,State,Number,Member,Party,2025 Cook PVI,2025 Cook PVI Party,2025 Cook PVI Number
0,Alabama,1,Barry Moore,R,R+27,R,27
1,Alabama,2,Shomari Figures,D,D+5,D,5
2,Alabama,3,Mike Rogers,R,R+23,R,23


In [5]:
#load congressional base data
meta_data=pd.read_csv('119th Congress.csv')

# Create a dictionary to map state names to abbreviations
state_abbreviations = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
    'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA',
    'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD',
    'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO',
    'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH',
    'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC',
    'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT',
    'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY', 'District of Columbia':'DC'

}

# Add a new column 'State_Abbr' to the house_final DataFrame
meta_data['State_Abbr'] = meta_data['State'].map(state_abbreviations)
house_pvi['State_Abbr']=house_pvi['State'].map(state_abbreviations)

# Create the new column 'State_District'
meta_data['State_District'] = meta_data.apply(
    lambda row: str(row['State_Abbr']) + '-' + (
        "{:02d}".format(int(float(row['District'])))  # Convert to float first, then to int
        if str(row['District']).replace('.', '', 1).isdigit()  # Check if it's numeric, allowing for decimal points
        else str('AL') # Use 'AL' for at large districts
    ),
    axis=1
)

house_pvi['State_District'] = house_pvi.apply(
    lambda row: str(row['State_Abbr']) + '-' + (
        "{:02d}".format(int(float(row['Number'])))  # Convert to float first, then to int
        if str(row['Number']).replace('.', '', 1).isdigit()  # Check if it's numeric, allowing for decimal points
        else str('AL') # Use 'AL' for at large districts
    ),
    axis=1
)

temp_source_pvi=pd.merge(meta_data,house_pvi,left_on='State_District', right_on='State_District', how='left')

source_pvi_house=temp_source_pvi[temp_source_pvi['Chamber']=="House"]
source_pvi_house = source_pvi_house.rename(columns={'Party_x': 'Party'})
source_pvi_house.head()

Unnamed: 0,Name,Chamber,bioguide_id,State_x,District,Party,State_Abbr_x,State_District,State_y,Number,Member,Party_y,2025 Cook PVI,2025 Cook PVI Party,2025 Cook PVI Number,State_Abbr_y
0,Barry Moore,House,M001212,Alabama,1,Republican,AL,AL-01,Alabama,1,Barry Moore,R,R+27,R,27,AL
1,Shomari Figures,House,F000481,Alabama,2,Democratic,AL,AL-02,Alabama,2,Shomari Figures,D,D+5,D,5,AL
2,Mike D. Rogers,House,R000575,Alabama,3,Republican,AL,AL-03,Alabama,3,Mike Rogers,R,R+23,R,23,AL
3,Robert B. Aderholt,House,A000055,Alabama,4,Republican,AL,AL-04,Alabama,4,Robert Aderholt,R,R+33,R,33,AL
4,Dale W. Strong,House,S001220,Alabama,5,Republican,AL,AL-05,Alabama,5,Dale Strong,R,R+15,R,15,AL


In [6]:
source_pvi_house['PVI Party Difference']=(source_pvi_house['Party_y']!=source_pvi_house['2025 Cook PVI Party'])
source_pvi_house.head()

Unnamed: 0,Name,Chamber,bioguide_id,State_x,District,Party,State_Abbr_x,State_District,State_y,Number,Member,Party_y,2025 Cook PVI,2025 Cook PVI Party,2025 Cook PVI Number,State_Abbr_y,PVI Party Difference
0,Barry Moore,House,M001212,Alabama,1,Republican,AL,AL-01,Alabama,1,Barry Moore,R,R+27,R,27,AL,False
1,Shomari Figures,House,F000481,Alabama,2,Democratic,AL,AL-02,Alabama,2,Shomari Figures,D,D+5,D,5,AL,False
2,Mike D. Rogers,House,R000575,Alabama,3,Republican,AL,AL-03,Alabama,3,Mike Rogers,R,R+23,R,23,AL,False
3,Robert B. Aderholt,House,A000055,Alabama,4,Republican,AL,AL-04,Alabama,4,Robert Aderholt,R,R+33,R,33,AL,False
4,Dale W. Strong,House,S001220,Alabama,5,Republican,AL,AL-05,Alabama,5,Dale Strong,R,R+15,R,15,AL,False


In [7]:
source_pvi_house['PVI Party Difference'].sum()


22

In [8]:
#where PVI Party Difference = True, change PVI Number to 0
def fix_different_party(df):
  for index, row in df.iterrows():
    if row['PVI Party Difference'] == True:  # Check if PVI Party Difference is true.
      df.loc[index, '2025 Cook PVI Number'] = 0 # Assign N + 0 
  return df

fix_different_party(source_pvi_house)

Unnamed: 0,Name,Chamber,bioguide_id,State_x,District,Party,State_Abbr_x,State_District,State_y,Number,Member,Party_y,2025 Cook PVI,2025 Cook PVI Party,2025 Cook PVI Number,State_Abbr_y,PVI Party Difference
0,Barry Moore,House,M001212,Alabama,1,Republican,AL,AL-01,Alabama,1,Barry Moore,R,R+27,R,27,AL,False
1,Shomari Figures,House,F000481,Alabama,2,Democratic,AL,AL-02,Alabama,2,Shomari Figures,D,D+5,D,5,AL,False
2,Mike D. Rogers,House,R000575,Alabama,3,Republican,AL,AL-03,Alabama,3,Mike Rogers,R,R+23,R,23,AL,False
3,Robert B. Aderholt,House,A000055,Alabama,4,Republican,AL,AL-04,Alabama,4,Robert Aderholt,R,R+33,R,33,AL,False
4,Dale W. Strong,House,S001220,Alabama,5,Republican,AL,AL-05,Alabama,5,Dale Strong,R,R+15,R,15,AL,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
528,Glenn Grothman,House,G000576,Wisconsin,6,Republican,WI,WI-06,Wisconsin,6,Glenn Grothman,R,R+8,R,8,WI,False
529,Thomas P. Tiffany,House,T000165,Wisconsin,7,Republican,WI,WI-07,Wisconsin,7,Tom Tiffany,R,R+11,R,11,WI,False
530,Tony Wied,House,W000829,Wisconsin,8,Republican,WI,WI-08,Wisconsin,8,Tony Wied,R,R+8,R,8,WI,False
533,Harriet M. Hageman,House,H001096,Wyoming,At Large,Republican,WY,WY-AL,Wyoming,AL,Harriet Hageman,R,R+23,R,23,WY,False


In [9]:
#check sum of PVIs for evens/party difference
temp=source_pvi_house[source_pvi_house['PVI Party Difference']==True]
temp['2025 Cook PVI Number'].sum()

0

In [10]:
#print names of people that were True for PVI Party Difference
temp['Name']

17              Juan Ciscomani
41                   Adam Gray
90                  Gabe Evans
103      Eleanor Holmes Norton
216            Jared F. Golden
246                Tom Barrett
247     Kristen McDonald Rivet
286                  Don Bacon
306             Thomas H. Kean
315               Gabe Vasquez
321           Thomas R. Suozzi
335             Michael Lawler
347            Donald G. Davis
374               Marcy Kaptur
378        Emilia Strong Sykes
398       Brian K. Fitzpatrick
417            Pablo Hernández
471              Henry Cuellar
477           Vicente Gonzalez
493         Stacey E. Plaskett
495        Jennifer A. Kiggans
509    Marie Gluesenkamp Perez
Name: Name, dtype: object

In [11]:
#select only relevant columns
source_pvi_house=source_pvi_house[['Name', 'Chamber', 'bioguide_id', 'Party',
        '2025 Cook PVI Party',
       '2025 Cook PVI Number', 'PVI Party Difference']]
#save
source_pvi_house.to_csv('house_pvi.csv')

# Senate

In [12]:
senate_pvi=pd.read_csv('2025 PVI States.csv')
senate_pvi.head(3)

Unnamed: 0,State,2025 PVI,Raw PVI,Rank (D to R)
0,Alabama,R+15,R+14.81,44
1,Alaska,R+6,R+6.46,32
2,Arizona,R+2,R+2.06,27


In [13]:
#split pvi number to separate party and number
senate_pvi[['2025 Cook PVI Party','2025 Cook PVI Number']]=senate_pvi['Raw PVI'].str.split("+", n=1, expand=True)

senate_pvi.head(3)

Unnamed: 0,State,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number
0,Alabama,R+15,R+14.81,44,R,14.81
1,Alaska,R+6,R+6.46,32,R,6.46
2,Arizona,R+2,R+2.06,27,R,2.06


In [14]:
#load congressional base data
meta_data=pd.read_csv('119th Congress.csv')
meta_data_senate=meta_data[meta_data['Chamber']=="Senate"]

meta_data_senate['Party_Letter']=meta_data_senate['Party'].str[:1]

meta_data_senate.head(5)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter
7,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R
8,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R
10,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R
11,Dan Sullivan,Senate,S001198,Alaska,,Republican,R
21,Ruben Gallego,Senate,G000574,Arizona,,Democratic,D


In [15]:
temp_senate_pvi=pd.merge(meta_data_senate,senate_pvi,left_on='State', right_on='State', how='left')
temp_senate_pvi.head()

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number
0,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81
1,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81
2,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46
3,Dan Sullivan,Senate,S001198,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46
4,Ruben Gallego,Senate,G000574,Arizona,,Democratic,D,R+2,R+2.06,27,R,2.06


In [16]:
#check if party of congressperson is equal to cook PVI lean
temp_senate_pvi['PVI Party Difference']=(temp_senate_pvi['Party_Letter']!=temp_senate_pvi['2025 Cook PVI Party'])
temp_senate_pvi.head(3)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number,PVI Party Difference
0,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
1,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
2,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46,False


In [17]:
temp_senate_pvi['PVI Party Difference'].sum()

13

In [18]:
temp_senate_pvi[temp_senate_pvi['PVI Party Difference']==True]['Name']

4              Ruben Gallego
5                 Mark Kelly
18                Jon Ossoff
19        Raphael G. Warnock
36          Susan M. Collins
37        Angus S. King, Jr.
42            Gary C. Peters
43            Elissa Slotkin
54    Catherine Cortez Masto
55               Jacky Rosen
74            John Fetterman
88           Bernard Sanders
96             Tammy Baldwin
Name: Name, dtype: object

In [19]:
#where PVI Party Difference = True, change PVI Number to 0
def fix_different_party(df):
  for index, row in df.iterrows():
    if row['PVI Party Difference'] == True:  # Check if PVI Party Difference is true.
      df.loc[index, '2025 Cook PVI Number'] = 0 # Assign N + 0 
  return df

fix_different_party(temp_senate_pvi)

Unnamed: 0,Name,Chamber,bioguide_id,State,District,Party,Party_Letter,2025 PVI,Raw PVI,Rank (D to R),2025 Cook PVI Party,2025 Cook PVI Number,PVI Party Difference
0,Katie Boyd Britt,Senate,B001319,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
1,Tommy Tuberville,Senate,T000278,Alabama,,Republican,R,R+15,R+14.81,44,R,14.81,False
2,Lisa Murkowski,Senate,M001153,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46,False
3,Dan Sullivan,Senate,S001198,Alaska,,Republican,R,R+6,R+6.46,32,R,6.46,False
4,Ruben Gallego,Senate,G000574,Arizona,,Democratic,D,R+2,R+2.06,27,R,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,James C. Justice,Senate,J000312,West Virginia,,Republican,R,R+21,R+20.96,50,R,20.96,False
96,Tammy Baldwin,Senate,B001230,Wisconsin,,Democratic,D,EVEN,R+0.25,22,R,0,True
97,Ron Johnson,Senate,J000293,Wisconsin,,Republican,R,EVEN,R+0.25,22,R,0.25,False
98,John Barrasso,Senate,B001261,Wyoming,,Republican,R,R+23,R+23.23,51,R,23.23,False


In [20]:
#select only relevant columns
source_pvi_senate=temp_senate_pvi[['Name', 'Chamber', 'bioguide_id',
       '2025 Cook PVI Number']]

source_pvi_senate.head(3)

Unnamed: 0,Name,Chamber,bioguide_id,2025 Cook PVI Number
0,Katie Boyd Britt,Senate,B001319,14.81
1,Tommy Tuberville,Senate,T000278,14.81
2,Lisa Murkowski,Senate,M001153,6.46


In [21]:
source_pvi_senate.to_csv('senate_pvi.csv')