#### Create a graph of contribution between PACs ####

Plots fund transfers between "pac_other22.csv" to "cmtes22.csv"

In [20]:
def make_plot_contrib_pacs22(df_pac_other22, df_cmtes22, party=None, nrows=1e100):
    
    # Duplicate dataframe.
    df_pac_other22_temp = df_pac_other22[['filerid', 'recipid', 'party']]
    df_pac_other22_deduped = df_pac_other22_temp.drop_duplicates()
    
    # Create placeholder for no party.
    df_pac_other22_deduped['party'] = df_pac_other22_deduped['party'].replace(['', ' '], 'X').fillna('X').str.strip().replace('', 'X').fillna('X')
    
    # Filter rows where column1 is not equal to column2
    df_pac_other22_filtered = df_pac_other22_deduped[df_pac_other22_deduped['filerid'] != df_pac_other22_deduped['recipid']]
    df_pac_other22_filtered[~df_pac_other22_filtered['filerid'].isna() & (df_pac_other22_filtered['filerid'] != '')]
    df_pac_other22_filtered[~df_pac_other22_filtered['recipid'].isna() & (df_pac_other22_filtered['recipid'] != '')]

    # Filter by party, if desired. "D" or "R"
    if party != None:
        df_pac_other22_filtered = df_pac_other22_filtered[df_pac_other22_filtered['party'] == party]

    # Duplicate edges as weights. No... should be dollar amount.
    df_edges = df_pac_other22_filtered[:nrows].groupby(['filerid', 'recipid']).size().reset_index(name='weight')
    
    # Make graph with weighted edges.
    G = nx.from_pandas_edgelist(df_edges, source='filerid', target='recipid', edge_attr='weight', create_using=nx.DiGraph())
    in_degrees = dict(G.in_degree())
    nodes_sorted = sorted(G.nodes, key=in_degrees.get, reverse=True)
    
    # Degree-based node size.
    node_sizes_sorted = [in_degrees[node] * 10 for node in nodes_sorted]
    
    # Top n degree labels.
    label_dict = pd.Series(df_cmtes22.pacshort.values, index=df_cmtes22.cmteid).to_dict()
    top_n_nodes = sorted(in_degrees, key=in_degrees.get, reverse=True)[:20]
    top_n_labels = {node: split_label(label_dict[node].upper()) for node in top_n_nodes if node in label_dict}
    
    # Party node color.
    df_pac_other22['filerid'] = df_pac_other22['filerid'].fillna('X')
    df_pac_other22['filerid'] = df_pac_other22['filerid'].astype(str)
    color_map = {'D': 'blue', 'R': 'red', 'G': 'green', 'I': 'brown', 'X': 'gray'}
    node_colors = {row['filerid']: color_map.get(row['party'], 'gray') for _, row in df_pac_other22.iterrows()}
    node_color_list_sorted = [node_colors.get(str(node), 'gray') for node in nodes_sorted]
    
    # Party edge color.
    in_degree_values = [in_degrees[target] for _, target in G.edges]
    norm = plt.Normalize(vmin=min(in_degree_values), vmax=max(in_degree_values))
    edge_colors = [node_colors.get(str(source), 'gray') for source, _ in G.edges]
    
    # Node spacing.
    initial_pos = {node: (0.5, 0.5) for node in top_n_nodes}
    pos = nx.spring_layout(G, pos=initial_pos, fixed=top_n_nodes, k=1.25, scale=1.5, iterations=200)

    # Edges.
    edge_widths = [G[u][v]['weight'] * 0.1 for u, v in G.edges]
    
    # Draw.
    plt.figure(figsize=(10, 10))
    pos = nx.spring_layout(G)
    nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color=edge_colors) #0.15 width
    nx.draw_networkx_nodes(G, pos, nodelist=nodes_sorted, node_color=node_color_list_sorted, node_size=node_sizes_sorted, edgecolors='white', linewidths=1)
    nx.draw_networkx_labels(G, pos, labels=top_n_labels, font_size=5, font_color="black", bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.4'))
    plt.show()

    return G