In [1]:
import pandas as pd
import numpy as np
import re


In [2]:
#download raw communications file from APP - which is all communications APP has run through its algorithm to 
#score for the year 2025 

#read in files
df= pd.read_csv('2025-04-24 app comm raw.csv',low_memory=False)
df_119 = pd.read_csv('119th_congress_bioguideids.csv')

In [3]:
#function to clean 119_congress_bioguideids file
def clean_119(df):

  #make first row the column names
  df.columns = df.iloc[0]
  df = df_119[1:].reset_index(drop=True)

  #rename NaN column
  df.columns = ['bioguide_id' if pd.isna(col) else col for col in df.columns]

  #create full_name column from Name column
  name_only = df['Name'].str.split(' - ').str[0]
  df['full_name'] = name_only.apply(lambda x: ' '.join(x.split(',')[1].strip().split()[:1]) + ' ' + x.split(',')[0].strip())

  #only include bioguide_id column
  df = df[['bioguide_id', 'full_name']]

  return df

df_bioguide = clean_119(df_119)
df_bioguide

Unnamed: 0,bioguide_id,full_name
0,A000370,Alma Adams
1,A000055,Robert Aderholt
2,A000371,Pete Aguilar
3,A000379,Mark Alford
4,A000372,Rick Allen
...,...,...
533,W000800,Peter Welch
534,W000802,Sheldon Whitehouse
535,W000437,Roger Wicker
536,W000779,Ron Wyden


In [4]:
def clean_communications_app(df):
  #filter only necessary columns
  df = df[['bioguide_id','attack_personal','outcome_bipartisanship','policy','first_name','last_name']].copy()

  #sum bioguide_id column to get total communication count
  df['communication_count'] = df.groupby('bioguide_id')['bioguide_id'].transform('count')

  #create full name column by combining first_name and last_name
  df['full_name'] = df['first_name'] + ' ' + df['last_name']

  #sum atttack_personal and outcome_bipartisanship column by biogude_id
  df = df.groupby(['bioguide_id','full_name','communication_count'], as_index=False).agg({'attack_personal':'sum', 'outcome_bipartisanship':'sum', 'policy':'sum'})

  #create pct columns
  df['attack_personal_pct'] = df['attack_personal'] / df['communication_count'] * 100
  df['outcome_bipartisanship_pct'] = df['outcome_bipartisanship'] / df['communication_count'] * 100
  df['policy_pct'] = df['policy'] / df['communication_count'] * 100
  return df

#drop first_name and last_name columns
df_comm_app_2025 = clean_communications_app(df).sort_values(by='bioguide_id', ascending=True).reset_index(drop=True)


In [5]:
#export csv
df_comm_app_2025.to_csv('app_communications_2025_04_24.csv', index=False)