In [None]:
import pandas as pd
import shapefile
import shutil
from pathlib import Path
from collections import defaultdict
from matplotlib import pyplot as plt
# import folium
# import numpy as np

########################################################################################################################

# Inputs and file locations
Datapath = Path('Input_Data')
filename = input("Input csv file name with the extension >> ")
exclude_col = input("Name of the exclude column to filter rows. Enter 'None' if no such column is present >> ")
# contributions_csv = Datapath.joinpath('BCU Contributions - exclusions updated.csv')
zipcodes_shp = Datapath.joinpath('zipcodes_MA_map/zipcodes_extract')

# Output for processed shapefile 
Outputpath = Path('Output')
output_shp = Outputpath.joinpath('Shapefile/output_shp')

# other analysis parameters
start_year = 2017
end_year = 2020

# min/max dollar limits for analysis. Outliers are saved to a separate list of donations
min_limit = 0
max_limit = 1000000

# header names in the CSV file
amount = 'Amount'
date = 'Date Received'
donor_id_header = 'VANID'
zipcode_header = 'Home Zip/Postal'
# exclude = 'Exclude' # if no exclude column is present, input Exlcude = None
if exclude_col == 'None':
    exclude = None
else:
    exclude = exclude_col

# zipcodes to map. These are the zipcodes in Boston area
zips_to_map = list(set([2132, 2128, 2136, 2130, 2135, 2131, 2124, 2138, 2467, 2125, 2127,
                       2135, 2445, 2119, 2122, 2145, 2129, 2134, 2446, 2144, 2140, 2121,
                       2118, 2210, 2116, 2115, 2215, 2120, 2141, 2467, 2114, 2142, 2111,
                       2110, 2108, 2109, 2163, 2113, 2139, 2143, 2126]))
zips_to_map = [str(x) for x in zips_to_map]

########################################################################################################################

# read contributions CSV file as a pandas dataframe
df = pd.read_csv(contributions_csv, parse_dates=[date])

# create  a nested dictionary for years, zipcodes, donor_ids, amt list fields respectively
excluded_donations = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
small_donations = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
large_donations = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
qualified_donations = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))


# dictionary for each year
for index, row in df.iterrows():      
    _amt = row[amount]
    _yr = row[date].year
    _id = row[donor_id_header]
    _zip = row[zipcode_header]
    if (exclude is not None) and (row[exclude] == 1):
        excluded_donations[_yr][_zip][_id].append(_amt)
    elif _amt <= min_limit:
        small_donations[_yr][_zip][_id].append(_amt)
    elif _amt >= max_limit:
        large_donations[_yr][_zip][_id].append(_amt)
    else:
        qualified_donations[_yr][_zip][_id].append(_amt)

# yearly summary contributions by zipcode
num_donors = dict()
num_donations = dict()
total_amount = dict()

yr = start_year
while yr <= end_year:
    num_donations[yr] = defaultdict(int)
    num_donors[yr] = defaultdict(int)
    total_amount[yr] = defaultdict(int)
    for _zip in qualified_donations[yr].keys():
        num_donors[yr][_zip] = len(qualified_donations[yr][_zip])
        num_donations[yr][_zip] = sum((len(v) for v in qualified_donations[yr][_zip].values()))
        total_amount[yr][_zip] = sum((sum(v) for v in qualified_donations[yr][_zip].values()))
    yr += 1

########################################################################################################################

# read shapefile to get a list of zip codes
r = shapefile.Reader(str(zipcodes_shp))
# shapeRecs = r.shapeRecords()

# write results to a new shapefile
w = shapefile.Writer(str(output_shp))
w.fields = r.fields[1:]
yr = start_year
while yr <= end_year:
    w.field('{0}donors'.format(yr), 'N', 6)
    w.field('{0}donati'.format(yr), 'N', 6)
    w.field('{0}amount'.format(yr), 'F', 12, decimal=2)
    yr+=1

# get neighborhood names from the shapefile for selected zipcodes
neighborhoods = dict()    

w.fields
for sr in r.iterShapeRecords():
    _zip = str(int(sr.record['POSTCODE']))
    if _zip in zips_to_map:
        neighborhoods[_zip] = sr.record['Name']
        new_fields = []
        yr = start_year
        while yr <= end_year:
            new_fields += [num_donors[yr][_zip], num_donations[yr][_zip], total_amount[yr][_zip]]
            yr += 1
        to_add = list(sr.record) + new_fields
        w.record(*to_add)
        w.shape(sr.shape)
w.close()

shutil.copy(str(zipcodes_shp.with_suffix('.prj')), str(output_shp.with_suffix('.prj')))

########################################################################################################################

# Annual Report
# Table of neighborhoods for number of qualified donors and amounts
yr = start_year
summary_tables_path = Outputpath.joinpath("Annual_Reports/")
summary_tables_path.mkdir(parents=True, exist_ok=True)
while yr <= end_year:
    csv_to_save = summary_tables_path.joinpath("{}_donations_by_neighborhood.csv".format(yr))
    donations_dict = qualified_donations[yr]

    neighborhood_donors = defaultdict(int)
    neighborhood_amount = defaultdict(int)
    for _zip in donations_dict.keys():
        if _zip in zips_to_map:
            neighborhood_donors[neighborhoods[_zip]] += len(donations_dict[_zip])
            neighborhood_amount[neighborhoods[_zip]] += sum((sum(v) for v in donations_dict[_zip].values()))
        else:
            neighborhood_donors['Other'] += len(donations_dict[_zip])
            neighborhood_amount['Other'] += sum((sum(v) for v in donations_dict[_zip].values()))

    to_write = "Neighborhood,# Donors,Amount\n"
    for k in sorted(neighborhood_donors, key=neighborhood_donors.get, reverse=True):
        if k != 'Other':
            to_write += "{0},{1},{2}\n".format(k, neighborhood_donors[k], neighborhood_amount[k])
    to_write += "Other or Unknown Zip Code,{0},{1}\n".format(neighborhood_donors['Other'], neighborhood_amount['Other'])

    donations_dict = excluded_donations[yr]
    excl_donors = sum(len(donations_dict[_zip]) for _zip in donations_dict.keys())
    excl_amt = sum(sum(sum(donations_dict[z][v]) for v in donations_dict[z].keys()) for z in donations_dict.keys())
    to_write += "Excluded Contributions,{0},{1}".format(excl_donors, excl_amt)

    with open(str(csv_to_save), 'w') as f:
        f.write(to_write)
    yr += 1

shutil.make_archive('output_files', 'zip', 'Output')

In [None]:
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = [x for x in list(set(neighborhoods.values())) if x not in ['BROOKLINE', 'CAMBRIDGE', 'SOMERVILLE', 'CHESTNUT HILL']]


fig = plt.figure()
# fig1, ax1 = plt.subplots()
sizes = [neighborhood_donors[x] for x in boston_neighborhoods]
ax1 = fig.add_axes([0, 0.5, .5, .5], aspect=1)
ax1.pie(sizes, labeldistance=None)
# ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
ax1.set_title('Number of donors by location')

sizes = [neighborhood_amount[x] for x in boston_neighborhoods]
ax2 = fig.add_axes([0, 0, .5, .5], aspect=1)
patches, texts = ax2.pie(sizes, labeldistance=None)
ax2.set_title('Total donation amount by location')
ax2.frameon=True
# ax2.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

# plt.legend(patches, labels, bbox_to_anchor=[0,0,0.5,1])
plt.show()

In [None]:
yr = start_year
while yr <= end_year:
    print("Number of donors in {} = {}".format(yr, sum(num_donors[yr].values())))
    print("Number of donations in {} = {}".format(yr, sum(num_donations[yr].values())))
    print("Total amount in {} = {}".format(yr, sum(total_amount[yr].values())))
    yr += 1

In [None]:
# create a folium map for zipcode boundaries
# def style_fcn(x):
#     return {'lineColor': '#00FFFFFF'}
# style_function=style_fcn

m = folium.Map(location=[42.34, -71.10], tiles='cartodbpositron', zoom_start=11.2, control_scale=True)
folium.GeoJson(str(zipcodes_json)).add_to(m)
m