In [147]:
import pandas as pd
import networkx as nx
import plotly.graph_objs as go
import os

In [130]:
def consolidate_data(members_df, votes_df, filterOverabundance):
    votes_columns = ["congress", "chamber", "rollnumber", "icpsr", "cast_code"]
    members_columns = ["icpsr", "bioname", "party_code", "nominate_dim1", "nominate_dim2"]

    votes_selected = votes_df[votes_columns]
    members_selected = members_df[members_columns]

    # Filter out rows with invalid party_code values
    valid_party_codes = [100, 200]
    members_selected = members_selected[members_selected['party_code'].isin(valid_party_codes)]

    # Merge the DataFrames on the 'icpsr' column
    merged_df = pd.merge(votes_selected, members_selected, on='icpsr')

    # Replace party_code values
    merged_df['party_code'] = merged_df['party_code'].replace({100: 'Democrat', 200: 'Republican'})

    # Replace cast_code values and filter out invalid rows
    cast_code_replacements = {1: 'Yes', 2: 'Yes', 3: 'Yes', 4: 'No', 5: 'No', 6: 'No'}
    merged_df['cast_code'] = merged_df['cast_code'].replace(cast_code_replacements)

    # Remove rows with invalid cast_code values
    valid_cast_codes = ['Yes', 'No']
    filtered_cleaned_df = merged_df[merged_df['cast_code'].isin(valid_cast_codes)]

    if filterOverabundance:
        # Group by rollnumber to check the vote distribution for each bill
        vote_summary = filtered_cleaned_df.groupby('rollnumber')['cast_code'].value_counts(normalize=True).unstack()

        # Identify bills with over 90% Yes or 90% No votes
        bills_to_remove = vote_summary[(vote_summary['Yes'] > 0.9) | (vote_summary['No'] > 0.9)].index

        # Remove rows associated with those bills
        final_df = filtered_cleaned_df[~filtered_cleaned_df['rollnumber'].isin(bills_to_remove)]
        
        return final_df
    else:
        return filtered_cleaned_df


In [145]:
def plotCollaborationNetwork(filtered_cleaned_df, cpc_percent, within_party_percent, dem_color, rep_color, bg_color, border_color):
    congress_number = filtered_cleaned_df['congress'].iloc[0]
    cross_party_threshold = len(filtered_cleaned_df['rollnumber'].unique()) * cpc_percent
    within_party_threshold = len(filtered_cleaned_df['rollnumber'].unique()) * within_party_percent
    # Create a graph
    G = nx.Graph()

    # Add nodes with attributes
    for _, row in filtered_cleaned_df.iterrows():
        G.add_node(row['icpsr'], 
                   party=row['party_code'], 
                   pos=(row['nominate_dim1'], row['nominate_dim2']),
                   bioname=row['bioname'])

    # Group by rollnumber and cast_code
    grouped_df = filtered_cleaned_df.groupby(['rollnumber', 'cast_code'])

    # Add edges with weights
    for (rollnumber, cast_code), group in grouped_df:
        icpsr_list = group['icpsr'].tolist()
        for i in range(len(icpsr_list)):
            for j in range(i + 1, len(icpsr_list)):
                if G.has_edge(icpsr_list[i], icpsr_list[j]):
                    G[icpsr_list[i]][icpsr_list[j]]['weight'] += 1
                else:
                    G.add_edge(icpsr_list[i], icpsr_list[j], weight=1)

    # Apply thresholds to edges
    edges_to_remove = []
    for u, v, d in G.edges(data=True):
        if G.nodes[u]['party'] != G.nodes[v]['party'] and d['weight'] < cross_party_threshold:
            edges_to_remove.append((u, v))
        elif G.nodes[u]['party'] == G.nodes[v]['party'] and d['weight'] < within_party_threshold:
            edges_to_remove.append((u, v))
    G.remove_edges_from(edges_to_remove)

    # Define colors for parties
    party_colors = {'Democrat': dem_color, 'Republican': rep_color}
    node_colors = [party_colors[G.nodes[n]['party']] for n in G.nodes]

    # Get positions for the nodes
    positions = {n: (G.nodes[n]['pos'][0], G.nodes[n]['pos'][1]) for n in G.nodes}
    node_x = [positions[n][0] for n in G.nodes]
    node_y = [positions[n][1] for n in G.nodes]
    node_text = [G.nodes[n]['bioname'] for n in G.nodes]

    # Calculate node sizes based on the number of cross-party edges
    cross_party_edges_count = {n: 0 for n in G.nodes}
    for u, v, d in G.edges(data=True):
        if G.nodes[u]['party'] != G.nodes[v]['party']:
            cross_party_edges_count[u] += 1
            cross_party_edges_count[v] += 1
    
    node_sizes = [10 + 0.2 * cross_party_edges_count[n] for n in G.nodes]  

    # Create edges
    edge_x = []
    edge_y = []
    edge_weights = []
    for edge in G.edges(data=True):
        x0, y0 = positions[edge[0]]
        x1, y1 = positions[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
        edge_weights.append(edge[2]['weight'])

    # Create edge traces
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.05, color='grey'),
        hoverinfo='none',
        mode='lines')

    # Create node traces
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        text=node_text,
        marker=dict(
            showscale=False,
            color=node_colors,
            size=node_sizes,
            line = dict(color=border_color, width=2)))

    # Create the figure
    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                        title= f'House of Representatives Voting Similarity - {congress_number}',
                        titlefont_size=16,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20,l=5,r=5,t=40),
                        annotations=[ dict(
                            text="Hover over nodes to see names",
                            showarrow=False,
                            xref="paper", yref="paper",
                            x=0.005, y=-0.002 ) ],
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False),
                        paper_bgcolor=bg_color))
                    
    filename = f'Graphs/Senate_Stuff/graph_senate_{congress_number}.html'
    fig.write_html(filename)
    return

In [144]:
# List of congress directories
congress_dirs = ['82nd', '95th', '102nd', '112th', '118th']

# Loop through each congress directory and process the files
for congress in congress_dirs:
    congress_number = congress[:-2]  # Extract the numeric part of the congress directory
    if len(congress_number) < 3:
        congress_number = f'0{congress_number}'
    members_path = os.path.join(congress, f'H{congress_number}_members.csv')
    votes_path = os.path.join(congress, f'H{congress_number}_votes.csv')

    # Read the CSV files
    members_df = pd.read_csv(members_path)
    votes_df = pd.read_csv(votes_path)

    # Consolidate data
    merged_df = consolidate_data(members_df, votes_df, True)

    # Plot collaboration network
    plotCollaborationNetwork(merged_df, cpc_percent=0.5, within_party_percent=0.75, dem_color='#003862', rep_color='#C00000', bg_color='#ffffff', border_color='#000000')

In [148]:
senate_118_members_df = pd.read_csv('Graphs/Senate_Stuff/S118_members.csv')
senate_118_votes_df = pd.read_csv('Graphs/Senate_Stuff/S118_votes.csv')

merged_senate_118_df = consolidate_data(senate_118_members_df, senate_118_votes_df, True)

plotCollaborationNetwork(merged_senate_118_df, cpc_percent=0.5, within_party_percent=0.75, dem_color='#003862', rep_color='#C00000', bg_color='#ffffff', border_color='#000000')

In [151]:
senate_82_members_df = pd.read_csv('Graphs/Senate_Stuff/S082_members.csv')
senate_82_votes_df = pd.read_csv('Graphs/Senate_Stuff/S082_votes.csv')

merged_senate_82_df = consolidate_data(senate_82_members_df, senate_82_votes_df, True)

plotCollaborationNetwork(merged_senate_82_df, cpc_percent=0.5, within_party_percent=0.75, dem_color='#003862', rep_color='#C00000', bg_color='#ffffff', border_color='#000000')