In [1]:
import numpy as np
import pandas as pd 
import os
import requests
import datetime as dt
import pickle
import gc
from math import sin, cos, sqrt, atan2, radians
from sklearn.preprocessing import StandardScaler
from sklearn import cluster
import seaborn as sns
import matplotlib.pyplot as plt
import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.offline.init_notebook_mode(connected=True)
%matplotlib inline

In [2]:
schools= pd.read_pickle('schools.pkl')
donors= pd.read_pickle('donors.pkl')
projects= pd.read_pickle('projects.pkl')
donations= pd.read_pickle('donations.pkl')
resources= pd.read_pickle('resources.pkl')
teachers= pd.read_pickle('teachers.pkl')

donorFeatureMatrixNoAdj = pd.read_pickle('donorFeatureMatrixNoAdj.pkl') #prior to 2018
donorFeatureMatrix = pd.read_pickle('donorFeatureMatrix.pkl') # prior to 2018'schools', 'teachers']

In [3]:
def merge_data(idList):
    ''' 
    Filter data based on a list of Donor ID.  Merge all data together into one dataframe.  
    Arguments: list of 'Donor ID'
    Returns: dataframe 
    '''
    temp = donations[donations['Donor ID'].isin(idList)].reset_index(drop = True)
    temp = temp.merge(donors, on = 'Donor ID', how='left')
    temp = temp.merge(projects, on = 'Project ID', how = 'left')
    temp = temp.merge(resources, on = 'Project ID', how = 'left')
    temp = temp.merge(schools, on = 'School ID', how = 'left')
    temp = temp.merge(teachers, on = 'Teacher ID', how = 'left')
    
    return temp


def summarize_by_city(df):
    ''' 
    Calculate donation amount in each city and scale to be used in the plot.
    '''
    df['cityTotal'] = df['Donation Amount'].groupby(df['School City']).transform('sum')
    chartData = df.groupby('School City').first().sort_values(by = 'cityTotal', ascending=False).reset_index()
    chartData = chartData[['School City','cityTotal', 'School_Lon', 'School_Lat', 'Donor City', 'Donor_Lat', 'Donor_Lon']].copy(deep = True)
    chartData['text'] = chartData['School City'] + ': $' + chartData['cityTotal'].apply('{0:,.0f}'.format).astype(str)

    # define cuts for plot
    top01 = chartData['cityTotal'].quantile(0.999)
    top1 = chartData['cityTotal'].quantile(0.99)
    top10 = chartData['cityTotal'].quantile(0.9)
    top50 = chartData['cityTotal'].quantile(0.5)
    topMax = chartData['cityTotal'].max()+1
    
    # bin donation
    chartData['group'] = pd.cut(chartData['cityTotal'],np.array([-0.1, top50, top10, top1, top01, topMax]), 3,
                           labels = ['Bottom 50%','11-50%', 'Top 2-10%', 'Top 1%', 'Top 0.01%'])
    
    # calculate scale
    scale = chartData['cityTotal'].median()
    return chartData, scale

def get_chart_index(chartData):
    ''' 
    Since the bubble map has various groupings according to the percentile of the donation, this function
    identifies the row numbers of the data to plot in each grouping as well as the colors for each group.
    '''
    x = chartData['group'].value_counts()
    x = x.reindex(index = ['Top 0.01%', 'Top 1%', 'Top 2-10%', '11-50%', 'Bottom 50%'])
    x = x.loc[x>0]
    numX = len(x.loc[x>0])
    limx = 0
    limits = []
    colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)", "rgb(232,255,184)"]
    
    # get colors:
    colors = colors[0:numX]
    colors.append('lightgrey')
    
    # get limits 
    # Not all datasets have value in every group, so we loop through the available values only
    for i in range(numX):
        limy = limx+x[i]
        limitEntry = (limx, limy)
        limx = limy
        limits.append(limitEntry)

    return limits, colors

def prepare_cities_donation(chartData, scale, limits, colors):
    ''' 
    prepare data into the format that plotly used for bubble map
    this applies to cities that the donor donated to
    '''
    cities = []
    for i in range(len(limits)):
        lim = limits[i]
        df_sub = chartData[lim[0]:lim[1]]
        city = dict(
            type = 'scattergeo',
            locationmode = 'USA-states',
            lon = df_sub['School_Lon'],
            lat = df_sub['School_Lat'],
            text = df_sub['text'],
            marker = dict(
                size = df_sub['cityTotal']/scale,
                color = colors[i],
                line = dict(width=0.5, color='rgb(40,40,40)'),
                sizemode = 'area'),
            name = df_sub.iloc[0]['group'])
            #name = '{0} - {1}'.format(lim[0],lim[1]) )
        cities.append(city)
    return cities

def donor_city_total(chartData):
    '''
    Calculate the amount that the donor donates to his/her home city
    '''
    donorHome = chartData['Donor City'][0]
    if (chartData['School City'] == donorHome).sum()>0:
        donorCityTotal = chartData[chartData['School City'] == donorHome]['cityTotal'].apply('{0:,.0f}'.format).astype(str)
        donorTotal = chartData[chartData['School City'] == donorHome]['cityTotal']
    else:
        donorCityTotal = '0'
        donorTotal = 0
    return donorCityTotal, donorTotal, donorHome 

def prepare_donor_donation(chartData, donorTotal, donorCityTotal, scale, donorHome):
    ''' 
    prepare data into the format that plotly used for bubble map
    this applies to donation in the donor's home town
    '''
    donorCity = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = max(chartData['Donor_Lon']),
        lat = max(chartData['Donor_Lat']),
        text = "Donor's City: " + donorHome + " $" + donorCityTotal,
        marker = dict(
            size = donorTotal/scale,
            color = 'rgb(40,40,40)',
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'),
        name = "Donor's City")
    return donorCity

def plot_bubble_map(idList):
    '''
    Run list of data preparation steps in order to plot the bubble map
    '''
    df = merge_data(idList)
    chartData, scale = summarize_by_city(df)
    limits, colors = get_chart_index(chartData)
    cities = prepare_cities_donation(chartData, scale, limits, colors)
    donorCityTotal, donorTotal, donorHome  = donor_city_total(chartData)
    donorCity = prepare_donor_donation(chartData, donorTotal, donorCityTotal, scale, donorHome)

    cities.append(donorCity)

    layout = dict(
            title = 'Donation by City<br>(Click legend to toggle traces)',
            showlegend = True,
            geo = dict(
                scope='usa',
                projection=dict( type='albers usa' ),
                showland = True,
                landcolor = 'rgb(217, 217, 217)',
                subunitwidth=1,
                countrywidth=1,
                subunitcolor="rgb(255, 255, 255)",
                countrycolor="rgb(255, 255, 255)"
            ),
        )
    fig = dict( data=cities, layout=layout )
    
    return py.offline.iplot(fig, validate=False, filename='donation map' )

In [4]:
idList = ['4416745560343f14a74dedcda4ec03b0']
plot_bubble_map(idList)