### Bonus Questions:

If you complete all of the above, you can attempt these challenging bonus questions.

Open Secrets also gives a detailed breakdown of contributions by source. For example, for Tennessee's second district, this is located at https://www.opensecrets.org/races/candidates?cycle=2020&id=TN02&spec=N

Scrape these pages to get information on contributions by source. See if you can find anything interesting in terms of the source of contributions. Some examples to get you started:
* What does the overall distribution of funding sources look like?
* Is there any detectable difference in contribution sources between Democrat and Republican candidates?
* Do the funding sources for either the winning candidate or incumbent candidate differ from the other candidates?

In [None]:
import pandas as pd
import warnings
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import plotly.express as px
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Select, HoverTool
from bokeh.io import show, output_notebook
from bokeh.transform import factor_cmap
from bokeh.layouts import row, column
from bokeh.models import FactorRange
from colorcet import glasbey
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 100)

In [None]:
all_contributions_by_source = pd.read_csv("Write_Data_Here/all_contributions_by_source.csv")
top_twenty_sources = pd.read_csv("Write_Data_Here/top_twenty_sources.csv")
all_districts = pd.read_csv("Write_Data_Here/all_districts.csv")
print(top_twenty_sources['Contributor'].value_counts().nlargest(25))

In [None]:
#%matplotlib
three_d_bar_chart_data = (
    pd.pivot_table(top_twenty_sources.loc[top_twenty_sources['Contributor'].\
                                          isin(top_twenty_sources['Contributor'].\
                                               value_counts().nlargest(10).index.tolist())],
                   values = 'Total', 
                   index = 'Contributor', 
                   columns = 'Party',
                   aggfunc = np.sum).fillna(0).astype('int64')
)

fig = plt.figure(figsize = (15, 12))
ax1 = fig.add_subplot(111, projection = '3d')

dz = []
for i in np.arange(0,4,1):
    for j in np.arange(0,10,1):
        dz.append(three_d_bar_chart_data\
                  [three_d_bar_chart_data.columns.tolist()[i]]\
                  [three_d_bar_chart_data.index.tolist()[j]])
np.array(dz).ravel()

x3, y3 = (
    np.meshgrid(np.arange(0,4,1), 
                np.arange(0,10,1))
)
x3, y3 = x3.ravel(), y3.ravel()

dx = 0.5
dy = 0.5
z3 = np.zeros_like(dz)

color_dict = {0:'b', 1:'y', 2:'r', 3:'g'}
colormap = [color_dict[i] for i in x3]

ax1.bar3d(x3, y3, z3, dx, dy, dz, alpha = 0.8, color = colormap, shade = True)

ax1.set_zlabel('Sum of Contribution')

ticksx = np.arange(0,4,1)
plt.xticks(ticksx, three_d_bar_chart_data.columns)

ticksy = np.arange(0,10,1)
plt.yticks(ticksy, ["Nat'l Assn of Realtors",
                    'Comcast',
                    'AT&T',
                    'Am. Bankers Assn',
                    'IBEW',
                    'Northrop Grumman',
                    'BCBS',
                    'Raytheon',
                    'Am. Crystal Sugar',
                    'Home Depot'])
plt.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

first_plot = three_d_bar_chart_data.sort_values('Democrat', ascending = False)[:10]
first_plot['Width'] = first_plot['Democrat'].apply(lambda x: x/first_plot['Democrat'].sum()*360)
second_plot = three_d_bar_chart_data.sort_values('Republican', ascending = False)[:10]
second_plot['Width'] = second_plot['Republican'].apply(lambda x: x/second_plot['Republican'].sum()*360)

fig = make_subplots(rows = 1, cols = 2, specs=[[{"type": "polar"}, {"type": "polar"}]])

def find_theta(i, series):
    theta = 0
    x = 0
    while x <= list(series).index(i):
        theta += series[x]/2 + series[x-1]/2
        x += 1
    return theta

fig.add_trace(go.Barpolar(theta = [find_theta(i, first_plot['Width']) for i in first_plot['Width']],
                          width = first_plot['Width'],
                          r = first_plot['Democrat']), 
              row = 1, 
              col = 1)

fig.add_trace(go.Barpolar(theta = [find_theta(i, second_plot['Width']) for i in second_plot['Width']],
                          width = second_plot['Width'],
                          r = second_plot['Republican']), 
              row = 1, 
              col = 2)

fig.update_layout(height = 600,
                  width = 800,
                  title_text= "Breakdown of Top Funding Sources by Major Party")

#fig.show()

In [None]:
# What does the overall distribution of funding sources look like?
fig, ax = plt.subplots(figsize = (15, 10))
sns.boxplot(data = all_contributions_by_source, x = "Type of Contribution", y = "Amount", ax = ax)
plt.xticks(rotation = 45)
plt.ylabel('Amount in $ Millions')
plt.show();

In [None]:
def bkapp(doc):
    """Function to produce vbar plot in Bokeh inline in Jupyter notebook."""
    top_twenty_sources = pd.read_csv("Write_Data_Here/top_twenty_sources.csv").dropna()
    # Create ColumnDataSource.
    source = ColumnDataSource(top_twenty_sources[top_twenty_sources['Name'] == 'Bernie Sanders'])
    # Create hovertool.
    hover = HoverTool(tooltips = [("Contributor", "@Contributor"),
                                  ("Total", "@Total")], 
                      mode = 'vline')
    
    # Create figure for vbars. x_range must be set to initial value.
    top_twenty_plot = (
        figure(title = 'Top Contributors to Bernie Sanders',
               x_range = FactorRange(factors = top_twenty_sources['Contributor'].\
                                     loc[top_twenty_sources['Name'] == 'Bernie Sanders'].tolist()),
               x_axis_label = 'Contributor',
               y_axis_label = 'Total', 
               width = 650)
    )
    
    # Create actual vbars and color map.
    top_twenty_plot_two = top_twenty_plot.vbar(x = 'Contributor', 
                                               top = 'Total', 
                                               width = 0.9, 
                                               source = source, 
                                               fill_color = factor_cmap('Contributor', 
                                                                        palette = glasbey, 
                                                                        factors = top_twenty_plot.x_range.factors))
    # Add hovertool to plot.
    top_twenty_plot.add_tools(hover)
    
    
    def select1_change(attrname, old, new):
        """Function to update second dropdown options based on first dropdown selection."""
        if select1.value == 'Senator':
            select2.options = (
                top_twenty_sources['Name'].\
                loc[top_twenty_sources['Senator or Representative'] == 'Senator'].\
                unique().\
                tolist())
            select2.value = 'Bernie Sanders'
        else:
                select2.options = top_twenty_sources['Name'].\
                loc[top_twenty_sources['Senator or Representative'] == 'Representative'].\
                unique().\
                tolist()
                select2.value = 'Alexandria Ocasio-Cortez'
    
    def update_plot(attrname, old, new):
        """Function to update plot values and colormap upon change of congressmember name in select bar."""
        source.data = ColumnDataSource.from_df(top_twenty_sources[top_twenty_sources['Name'] == select2.value])
        top_twenty_plot.x_range.factors = (
            top_twenty_sources['Contributor'].loc[top_twenty_sources['Name'] == select2.value].tolist()
        )
        top_twenty_plot_two.glyph.fill_color = factor_cmap('Contributor', 
                                                           palette = glasbey, 
                                                           factors = top_twenty_plot.x_range.factors)
        top_twenty_plot.title.text = 'Top Contributors to %s' % select2.value
    
    # Create dropdown box.
    select1 = Select(title = "Senator or Representative", 
                          value = "Senator",
                          options = ["Senator", "Representative"]) 
    
    select2 = Select(title = "Member of Congress", 
                          value = "Bernie Sanders",
                          options = top_twenty_sources['Name'].unique().tolist())
    
    # Adjust xticks to have room.
    top_twenty_plot.xaxis.major_label_orientation = 1.25
    
    # Change values on change in dropdown box.
    select1.on_change('value', select1_change)
    select2.on_change('value', update_plot)
    
    # Bokeh specific display language.
    layout = row(column(select1, select2), top_twenty_plot)
    doc.add_root(layout)

# Activates widget bar plot of top twenty contributors that updates based on member of congress.
output_notebook()
show(bkapp)