# GBH Earmarks Python Workbook

This is a workbook which has all data calculations & transforming for earmarks.

In [4]:
# Imports

import pandas as pd
# import geopandas as gpd
import matplotlib.pyplot as plt
import openpyxl # for spreadsheets
import matplotlib.font_manager as fm
import numpy as np
import re

In [5]:
# Options to support viewing of more items in console

pd.set_option('display.max_rows', 500)
pd.options.display.float_format = '{:20,.5f}'.format

In [7]:
# Load data

earmarks = pd.read_csv("data/earmarks.csv")
# town_shapefiles = gpd.read_file("data/town_shapefile.shp")
amendment_proposers = pd.read_excel("data/amendments.xlsx")
earmarks.head()
amendment_proposers.head()

Unnamed: 0,Amendment No.,Title,Sponsor,Subject,Consolidated Amendment,Status
0,1,New England Center for Children,"Gregoire, Danielle W. (HOU)",Education,B,Consolidated
1,2,Carbon Monoxide Detectors in Schools,"Gregoire, Danielle W. (HOU)",Education,B,Consolidated
2,3,Medical OnSite Academy for medical personnel\n...,"Garry, Colleen M. (HOU)",Health and Human Services,B,Consolidated
3,4,Broadband Access in the City of Marlborough,"Gregoire, Danielle W. (HOU)",Infrastructure,C,Consolidated
4,5,Permitting Software in the City of Marlborough,"Gregoire, Danielle W. (HOU)",Infrastructure,C,Consolidated


In [17]:
# Population

population = pd.read_csv("data/ma_census/population.csv", header=1)
population = population.rename(columns={"Geographic Area Name": "city", " !!Total:": "population"}, errors="raise")
# print(population.head())
population = population.loc[:, ["city", "population"]] # Comment this line out if you want less condensed statistics for population, including racial distribution

new_names = {}
for c in population["city"]:
    if type(c) == int or c == "County subdivisions not defined":
        continue
    if c == "Massachusetts":
        new_names[c] = "Massachusetts"
    else:
        new_names[c] = re.sub(r" (Town)? ?(town)? ?(city)?, \w* County, Massachusetts$", "", c) # Replaces "Massachusetts"
population = population.replace(new_names)
population = population.loc[~population["city"].isin(["County subdivisions not defined"])]
population.sort_values(by="city")

# I eventually just cleaned cities manually for those that had "Town" in them because it's just frustrating

population.to_csv(path_or_buf="out/cities.csv")
population

Unnamed: 0,city,population
0,"County subdivisions not defined, Barnstable Co...",0
1,Barnstable,48916
2,Bourne,20452
3,Brewster,10318
4,Chatham,6594
5,Dennis,14674
6,Eastham,5752
7,Falmouth,32517
8,Harwich,13440
9,Mashpee,15060


In [8]:
# Basic statistics

median = earmarks["Amount"].median()
mean = earmarks["Amount"].mean()
stddev = earmarks["Amount"].std()
print(mean, stddev) # stddev)

402090.382387022 2103267.934430257


In [9]:
# Agency cleanup -- removes the agency description and compresses down to the standard 3 word letter.

def agency_code(row):
    return row["Administering State Agency"][:3]

earmarks["Agency"] = earmarks.apply (lambda row: agency_code(row), axis=1)
earmarks["Administering State Agency"].value_counts()
earmarks
# TODO: group by agency

Unnamed: 0,Earmark Description,Location,Amount,Administering State Agency,State Agency Contact,Account Number,Earmark Language,Agency
0,54th Massachusetts Reenactors and Historical S...,Boston,25000,MMP - Massachusetts Marketing Partnership,Phyllis.cahaly@mass.gov,70021530,"provided further that not less than $25,000 sh...",MMP
1,A Street Pier Boat Ramp Rebuilding,Hull,150000,DCR - Department Of Conservation And Recreation,max.j.tassinari@mass.gov,15992031,"provided further, that not less than $150,000 ...",DCR
2,Abigail Adams Historical Society,Weymouth,25000,ANF - Executive Office For Administration And ...,daniel.shark@mass.gov,15992058,"provided further, that not less than $25,000 s...",ANF
3,Abington Housing Authority Upgrades,Abington,85000,DHCD - Dept Of Housing And Community Development,alain.fabo@mass.gov,70049318,"provided further, that not less than $85,000 s...",DHC
4,Abington Utility Task Vehicle,Abington,28000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992043,"provided further, that not less than $28,000 s...",HED
...,...,...,...,...,...,...,...,...
858,Wrentham Public Water Supply,Wrentham,150000,DEP - Department Of Environmental Protection,steven.mccurdy@mass.gov,20000015,"provided further, that not less than $150,000 ...",DEP
859,Youth Community Center in Georgetown,Georgetown,25000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,70100013,"provided further, that not less than $25,000 s...",HED
860,Youth Development in Lawrence,Lawrence,200000,LWD - Executive Office Of Labor And Workforce ...,sheila.l.tunney2@mass.gov,70100013,"provided further, that not less than $200,000 ...",LWD
861,YWCA Funding,Statewide,4500000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992047,"and provided further, that not less than $4,50...",HED


In [7]:
mpc = earmarks["Location"].value_counts().rename_axis("Location").reset_index(name="counts")
mpc = pd.merge(mpc, earmarks.groupby("Location").sum()["Amount"], on="Location").sort_values(by="Amount", ascending=False)
mpc

Unnamed: 0,Location,counts,Amount
2,Statewide,27,91655000
0,Boston,106,57670000
211,Norfolk County,1,50000000
26,Framingham,6,14050000
6,Lynn,12,5150000
243,Middlesex County,1,5000000
168,Western Mass,1,5000000
1,Springfield,33,4415000
5,Cape Cod,13,4335000
3,Worcester,16,4250000


In [8]:
# Towns with income/inequality -- many towns around Greater Boston and the mill towns out towards the Merrimack River have minority or income distribution inequalities. Are they getting money as these people were affected greatly by COVID-19? What projects did these places have?

# Determined neighborhoods were using the "Environmental
inequality_towns = ["Lawrence", "Lowell", "Haverhill", "Quincy", "Fitchburg", "Brockton", "Chelsea", "Lynn",
                 "New Bedford", "Fall River", "Dedham", "Mattapan", "Roxbury", "Revere", "Southbridge", "Leominster",
                 "Framingham", "Randolph", "Salem", "Milford", "Malden"]
dt_projects = earmarks.loc[earmarks["Location"].isin(inequality_towns)]
diversity = mpc.loc[mpc["Location"].isin(inequality_towns)]
# print(diversity, sum(diversity["Amount"]) / sum(mpc["Amount"]))
# print(dt_projects)

# Including Western towns?
inequality_towns +=  ["Springfield", "Worcester", "Holyoke"]

diversity = mpc.loc[mpc["Location"].isin(inequality_towns)]
dt_projects = earmarks.loc[earmarks["Location"].isin(inequality_towns)].sort_values(by="Location")
print(sum(diversity["Amount"]) / sum(mpc["Amount"]), "of the allocated money going to less-fortunate towns,\n",  len(dt_projects) / len(earmarks), "of the projects being pursued") # How much money is going to these towns' projects?
dt_projects

0.14256319811875368 of the allocated money going to less-fortunate towns,
 0.20046349942062572 of the projects being pursued


Unnamed: 0,Earmark Description,Location,Amount,Administering State Agency,State Agency Contact,Account Number,Earmark Language
182,Council on Aging,Brockton,1000000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992043,"provided further, that not less than $1,000,00..."
564,One Family Workforce Pilot Project,Brockton,510000,LWD - Executive Office Of Labor And Workforce ...,sheila.l.tunney2@mass.gov,70030102,"provided further, that not less than $510,000 ..."
744,"The Charity Guild, Inc.",Brockton,100000,MDAR - Department Of Agricultural Resources,cullen.roberts2@mass.gov,15992050,"provided further, that not less than $100,000 ..."
235,East Middle School Soccer Field Improvements,Brockton,100000,DCR - Department Of Conservation And Recreation,max.j.tassinari@mass.gov,15992031,"provided further, that not less than $100,000 ..."
330,Haitian Community Partners in Brockton,Brockton,100000,EHS - Executive Office Of Health And Human Ser...,Thomas.Statuto@mass.gov,40000013,"provided further, that $100,000 shall be expen..."
404,Kennedy Elementary School,Brockton,250000,DESE - Department Of Elementary & Secondary Ed...,jjou@mass.gov,70100015,"provided further, that not less than $250,000 ..."
101,"Brockton Neighborhood Health Center, Inc",Brockton,75000,EHS - Executive Office Of Health And Human Ser...,Thomas.Statuto@mass.gov,40000013,"provided further, that not less than $75,000 s..."
127,Cape Verdean Association of Brockton,Brockton,100000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992043,"provided further, that not less than $100,000 ..."
413,Latin Women’s Association Food Distribution Pr...,Brockton,25000,MDAR - Department Of Agricultural Resources,cullen.roberts2@mass.gov,15992050,"provided further, that not less than $25,000 s..."
747,The Neighborhood Developers in Chelsea,Chelsea,50000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,70022023,"provided further, that not less than $50,000 s..."


In [2]:
# On the other half of the coin, what about places with wealth, like the Metrowest or North Shore?
wealthy_towns = ["Newton", "Brookline", "Cambridge", "Somerville", "Wellesley", "Concord", "Lexington", "Winchester", "Sudbury", "Hopkinton", "Bedford", "Acton", "Holliston", "Reading", "Wilmington", "Gloucester", "Arlington"]
wealth_mpc = mpc.loc[mpc["Location"].isin(wealthy_towns)]
wt_projects = earmarks.loc[earmarks["Location"].isin(wealthy_towns)].sort_values(by="Location")

print(sum(wealth_mpc["Amount"]) / sum(mpc["Amount"]), "of allocated money to wealthy towns\n", len(wt_projects) / len(earmarks), "of projects being pursued")

wt_projects
# It does look like they're being relatively allocated correctly for the most part. Good responsibility!

NameError: name 'mpc' is not defined

# Keyword & Category Matching
The following cells below are all keywords to identify category matching. While there's a good list of keywords that will easily match below, I just wanted to give a quick rationale/things that you might find in these categories.

### Categories
#### Essentials
- Health: Anything relating to COVID-19, healthcare, or protection related to COVID-19, like HVAC installation and testing.
- Housing: All projects related to affordable housing, or other initiatives related to housing
- Food: All projects related to food pantries or access to food

#### Non-Essentials
While the overarching category is non-essentials, they are more broader categories that should not necessarily have direct impacts from the pandemic; mostly these are initiatives for public services unrelated to the above
- Arts: Mostly theater and stage productions, or other cultural initiatives. Includes Museums, Theaters, etc.
- Construction: Any project related to construction
- Economic: Business, commerce, or other economic incentives
- Education: Anything related to Primary/Secondary education, or libraries
- Environment: Many projects in these earmarks correlate to rehabilitation of beaches, or improvement of water or sewage networks.
- Essential Services: Ironically, these are closer to improvements towards fire/police stations, not necessarily funding for them
- Equality: Most projects that are geared towards minority groups or societies. Also includes advocacy groups and initiatives designed to bring up veterans.
- Events: Things related to one-time events like parades, new years parties
- Historical: You'd be surprised how many there are with just one keyword
- Human Services: Any project relating to family, youth, teen, or public safety development.
- Mental Health: Anything related to behavioral health
- Occupational: Projects relating to job training or initiatives to help people find work
- Recreation: Projects relating to recreational facilities such as parks, sports centers, and even sidewalks
- Rehabilitation: Anything related to drug rehabilitation
- Religious: Anything related to church or religious
- Transport: All projects relating to transport. Keywords omit "rail" as many could be projects for rail trails.
- Technology: Many projects in earmarks are related to upgrading facilities with new technology

#### Accessibility, Maintenance and Research
I've specifically separated these three as they're more broad reaching categories that could fit in any of the above. But these are really just improvements or studies.
- Accessibility: Any project relating to improving the quality of life for handicap people
- Maintenance: Any project relating to literally repairing, replacing, upgrading, restoring, improving, rebuilding, etc.
- Research: Any project relating to a feasibility study or a normal study

These categories are generally in flux as I add keywords to better represent all projects, and I will sometimes update manually to fix exceptions.

In [32]:
# Keyword Matching

# NOTE: the current, most updated earmarks_categories have also been manually scanned, and there are roughly 100 items not categorized through the systematic process or mistagged. While I intend to add more categories sometimes, bugs of old categorizing may still remain.

keywords_to_categories = {

    # essentials

    "Health": ["covid", "testing", "primary care", "HVAC", "health", "emergency", "paramedic"],
    "Housing": ["housing", "home", "homeless", "relocation"],
    "Food": ["food", "pantry", "servings"],

    # non-essentials

    "Arts": ["arts", "theater", "stage", "cultur", "museum", "exhibit", "media"],
    "Construction": ["building", "construction"],
    "Economic": ["commerce", "economic", "business"],
    "Education": ["school", "library", "books", "tutoring", "education", "academic"],
    "Environment": ["water", "beach", "sewer", "environment", "flood", "storm", "invasive species", "conservation", "wildlife", "climate", "solar"],
    "Essential Services": ["police department", "fire department"],
    "Equality": ["black", "asian", "women", "haiti", "hispanic", "latin", "LGBT", "seniors", "elders", "diversity", "veterans", "equity", "equality", "naacp", "caribbean", "legion", "disproportionate", "vfw"],
    "Events": ["first night", "celebration", "parade"],
    "Historical": ["historic", "history", " war "],
    "Human Services": ["family", "elder", "senior", "aging", "public safety", "immigrant", "community center", "age", "veterinar", "translation", "domestic violence"],
    "Mental Health": ["mental health" "behavioral health"],
    "Occupational": ["training", "job", "work"],
    "Recreation": ["park", "trail", "sidewalk", "garden", "tennis", "basketball", "soccer", "sports", "pond", "tree", "lighting", "walk", "arboretum"],
    "Rehabilitation": ["addiction", "substance"],
    "Religious": ["temple", "church", "ministry", "jewish",],
    "Transport": ["transport", "vehicle", "road", "train", "bus", "route"],
    "Technology": ["tech", " it ", "tech", "paperless", "fiber optic", "cable", "broadband", "digital", "website", "wireless", "electric", "software"],
    "Youth Programs": ["youth", "ymca", "boys and girls", "teen", "children"],

    # upkeep, research, acessibility
    "Accessibility": ["blind", "handicap", "ADA", "accessible"],
    "Maintenance": ["maintenance", "repair", "replacement", "upgrade", "upkeep", "restoration", "improvement", "rebuild", "renovation", "preservation", "clean"],
    "Research": ["research", "survey", "study", "assessment", "project", "design", "plan"]
}

keywords = categories = keywords_to_categories.keys()


In [22]:
# Matches keywords to earmarks function

def match_keywords(row):
    def in_category(keyword_list, text):
        for k in keyword_list:
            if k in text.lower():
                return True
        return False

    desc = row["Earmark Description"]
    lang = row["Earmark Language"]
    desc_matches = set([k for k in keywords_to_categories if in_category(keywords_to_categories[k], desc)])
    lang_matches = set([k for k in keywords_to_categories if in_category(keywords_to_categories[k], lang)])

    return desc_matches.union(lang_matches)

In [23]:
# Matching keywords to categories, new column for categories

f = lambda x: ', '.join(str(k) for k in match_keywords(x))
cats = earmarks.apply(f, axis=1)
earmarks["Categories"] = cats
earmarks

Unnamed: 0,Earmark Description,Location,Amount,Administering State Agency,State Agency Contact,Account Number,Earmark Language,Agency,Categories
0,54th Massachusetts Reenactors and Historical S...,Boston,25000,MMP - Massachusetts Marketing Partnership,Phyllis.cahaly@mass.gov,70021530,"provided further that not less than $25,000 sh...",MMP,Historical
1,A Street Pier Boat Ramp Rebuilding,Hull,150000,DCR - Department Of Conservation And Recreation,max.j.tassinari@mass.gov,15992031,"provided further, that not less than $150,000 ...",DCR,"Recreation, Construction, Maintenance"
2,Abigail Adams Historical Society,Weymouth,25000,ANF - Executive Office For Administration And ...,daniel.shark@mass.gov,15992058,"provided further, that not less than $25,000 s...",ANF,Historical
3,Abington Housing Authority Upgrades,Abington,85000,DHCD - Dept Of Housing And Community Development,alain.fabo@mass.gov,70049318,"provided further, that not less than $85,000 s...",DHC,"Housing, Maintenance"
4,Abington Utility Task Vehicle,Abington,28000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992043,"provided further, that not less than $28,000 s...",HED,Transport
...,...,...,...,...,...,...,...,...,...
858,Wrentham Public Water Supply,Wrentham,150000,DEP - Department Of Environmental Protection,steven.mccurdy@mass.gov,20000015,"provided further, that not less than $150,000 ...",DEP,"Environment, Research"
859,Youth Community Center in Georgetown,Georgetown,25000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,70100013,"provided further, that not less than $25,000 s...",HED,"Youth Programs, Human Services"
860,Youth Development in Lawrence,Lawrence,200000,LWD - Executive Office Of Labor And Workforce ...,sheila.l.tunney2@mass.gov,70100013,"provided further, that not less than $200,000 ...",LWD,Youth Programs
861,YWCA Funding,Statewide,4500000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992047,"and provided further, that not less than $4,50...",HED,"Economic, Maintenance, Human Services, Equalit..."


In [29]:
# Quick outputs and fix locations

# for cat in categories:
#     earmarks[cat] = earmarks["Categories"].str.contains(cat)

# Fix towns
town_fixes = {"Greater Boston": "Boston",
         "Padanram": "Dartmouth",
         "MetroWest": "Framingham",
         "Natick & Framingham": "Framingham",
         "Western Mass": "Hatfield",
         "Hilltown": "Huntington",
         "Malden & Revere": "Malden",
         "Burnham": "Methuen",
         "Amesbury, Lawrence": "Newburyport",
         "Nashoba Valley": "Westford"}

town_new_names = {new_names[k]: new_names[k] for k in new_names}

town_fixes.update(town_new_names) # adds all old names from before

print(town_fixes)

# Rematch towns
earmarks["Location"] = earmarks["Location"].map(town_fixes, na_action="ignore")

# DO NOT UNCOMMENT THE FOLLOWING LINES UNLESS YOU INTEND TO REFRESH ALL ITEMS' CATEGORIES!

earmarks.to_csv(path_or_buf="out/earmarks_categories_temp.csv")
earmarks.loc[pd.isna(earmarks["Location"])].to_csv(path_or_buf="out/earmarks_without_towns.csv")

{'Greater Boston': 'Boston', 'Padanram': 'Dartmouth', 'MetroWest': 'Framingham', 'Natick & Framingham': 'Framingham', 'Western Mass': 'Hatfield', 'Hilltown': 'Huntington', 'Malden & Revere': 'Malden', 'Burnham': 'Methuen', 'Amesbury, Lawrence': 'Newburyport', 'Nashoba Valley': 'Westford', 'County subdivisions not defined, Barnstable County, Massachusetts': 'County subdivisions not defined, Barnstable County, Massachusetts', 'Barnstable': 'Barnstable', 'Bourne': 'Bourne', 'Brewster': 'Brewster', 'Chatham': 'Chatham', 'Dennis': 'Dennis', 'Eastham': 'Eastham', 'Falmouth': 'Falmouth', 'Harwich': 'Harwich', 'Mashpee': 'Mashpee', 'Orleans': 'Orleans', 'Provincetown': 'Provincetown', 'Sandwich': 'Sandwich', 'Truro': 'Truro', 'Wellfleet': 'Wellfleet', 'Yarmouth': 'Yarmouth', 'Adams': 'Adams', 'Alford': 'Alford', 'Becket': 'Becket', 'Cheshire': 'Cheshire', 'Clarksburg': 'Clarksburg', 'Dalton': 'Dalton', 'Egremont': 'Egremont', 'Florida': 'Florida', 'Great Barrington': 'Great Barrington', 'Hanco

In [30]:
# Town cleaning if not already done
# This refreshes the earmarks & populations dataframes!?

earmarks = pd.read_csv("out/earmarks_categories.csv")
population = pd.read_csv("data/cities.csv")

# Category Statistics

Below, individual category statistics are calculated, like the percentage of money contributed to a certain category, the amount, etc.
Note that these altogether will not add up to 100%, since many categories overlap between projects.

In [12]:
# Categories

# Slow matching process, but it's necessary
def sum_category(category, df):
    total = 0
    for i, row in df.iterrows():
        if category in row["Categories"]:
            total += row["Amount"]
    return total

def count_category(category, df):
    count = 0
    for i, row in df.iterrows():
        if category in row["Categories"]:
            count += 1
    return count

def max_category(category, df, index_filter=None):
    if index_filter is None:
        index_filter = []
    total = 0
    earmark = ""
    location = ""
    index = -1
    for i, row in df.iterrows():
        if i in index_filter:
            continue
        if category in row["Categories"] and row["Amount"] > total:
            total = row["Amount"]
            earmark = row["Earmark Description"]
            location = row["Location"]
            index = i
    return total, earmark, index

total_dist = earmarks["Amount"].sum()
category_stats = pd.DataFrame({"keyword":[], "count":[], "pct_of_earmarks":[], "amount":[], "pct_of_total":[], "average":[], "max":[], "max_project": [], "max_project_index":[]})

for c in categories:
    sum_cat = sum_category(c, earmarks)
    count = count_category(c, earmarks)
    average = sum_cat / count
    # You can remove the index filter, but this just specifically removes projects that clog the "Max Project" stuff like the MBTA and other things in this example, you can totally add more indices if they give you headaches in the data
    max_num, max_earmark, max_index = max_category(c, earmarks, index_filter=[244, 314])
    category_stats.loc[len(category_stats)] = [c, count, count/len(earmarks), sum_cat, sum_cat/total_dist, average, max_num, max_earmark, max_index]

category_stats.sort_values(by=['amount'], ascending=False)

Unnamed: 0,keyword,count,pct_of_earmarks,amount,pct_of_total,average,max,max_project,max_project_index
22,Maintenance,236,0.27346,115804500,0.33373,490697.0339,12000000,Regional Environmental Remediation,634
18,Transport,120,0.13905,73938000,0.21308,616150.0,2000000,Substance Abuse Services in Suffolk County,724
5,Economic,79,0.09154,68845000,0.1984,871455.6962,4500000,YWCA Funding,861
0,Health,98,0.11356,46264000,0.13332,472081.63265,7000000,East Boston Neighborhood Health Center Behavio...,232
7,Environment,125,0.14484,44170000,0.12729,353360.0,12000000,Regional Environmental Remediation,634
20,Youth Programs,63,0.073,42405000,0.1222,673095.2381,10000000,"Alliance of Massachusetts YMCAs, Inc.",20
1,Housing,105,0.12167,42139000,0.12144,401323.80952,15000000,Massachusetts Alliance for Supportive Housing,459
23,Research,137,0.15875,39808000,0.11472,290569.34307,5000000,MassHealth Accountable Care Organization,469
3,Arts,67,0.07764,36781000,0.106,548970.14925,12000000,Regional Environmental Remediation,634
6,Education,107,0.12399,33443000,0.09638,312551.40187,5000000,Edward Kennedy Institute,245


In [36]:
# Split categories into individual fields

for cat in categories:
    earmarks[cat] = earmarks["Categories"].str.contains(cat)

earmarks.to_csv(path_or_buf="out/earmarks_categories.csv")
earmarks.to_excel("out/earmarks_categories.xlsx")
earmarks

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Earmark Description,Location,Amount,Administering State Agency,State Agency Contact,Account Number,Earmark Language,Agency,...,Occupational,Recreation,Rehabilitation,Religious,Transport,Technology,Youth Programs,Accessibility,Maintenance,Research
0,0,0,54th Massachusetts Reenactors and Historical S...,Boston,25000,MMP - Massachusetts Marketing Partnership,Phyllis.cahaly@mass.gov,70021530,"provided further that not less than $25,000 sh...",MMP,...,False,False,False,False,False,False,False,False,False,False
1,1,1,A Street Pier Boat Ramp Rebuilding,Hull,150000,DCR - Department Of Conservation And Recreation,max.j.tassinari@mass.gov,15992031,"provided further, that not less than $150,000 ...",DCR,...,False,True,False,False,False,False,False,False,True,False
2,2,2,Abigail Adams Historical Society,Weymouth,25000,ANF - Executive Office For Administration And ...,daniel.shark@mass.gov,15992058,"provided further, that not less than $25,000 s...",ANF,...,False,False,False,False,False,False,False,False,False,False
3,3,3,Abington Housing Authority Upgrades,Abington,85000,DHCD - Dept Of Housing And Community Development,alain.fabo@mass.gov,70049318,"provided further, that not less than $85,000 s...",DHC,...,False,False,False,False,False,False,False,False,True,False
4,4,4,Abington Utility Task Vehicle,Abington,28000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992043,"provided further, that not less than $28,000 s...",HED,...,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
858,858,858,Wrentham Public Water Supply,Wrentham,150000,DEP - Department Of Environmental Protection,steven.mccurdy@mass.gov,20000015,"provided further, that not less than $150,000 ...",DEP,...,False,False,False,False,False,False,False,False,False,True
859,859,859,Youth Community Center in Georgetown,Georgetown,25000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,70100013,"provided further, that not less than $25,000 s...",HED,...,False,False,False,False,False,False,True,False,False,False
860,860,860,Youth Development in Lawrence,Lawrence,200000,LWD - Executive Office Of Labor And Workforce ...,sheila.l.tunney2@mass.gov,70100013,"provided further, that not less than $200,000 ...",LWD,...,False,False,False,False,False,False,True,False,False,False
861,861,861,YWCA Funding,Cambridge,4500000,HED - Executive Office Of Economic Development,rory.c.ohanlon@mass.gov,15992047,"and provided further, that not less than $4,50...",HED,...,False,False,False,False,False,False,False,False,True,False


In [None]:
# Plotting & visualization for later
# town_shapefiles.plot()

In [20]:
# This is just something that bugged me while I was looking at something in Lawrence -- ironically the project says "Rail Construction"

provided further, that not less than $500,000 shall be expended for the city of Lawrence for the construction and expansion of a rail trail


In [36]:
print('{:20,.8f}'.format(12333344445676.0123456789))

12,333,344,445,676.01171875


In [18]:
# Housing - Gross Rent according to the American Community Survey

rent_by_income = pd.read_csv("data/ma_census/acs_rent/rent.csv")
rent_by_income.replace(new_names)
rent_by_income.to_csv(path_or_buf="out/rent_fixed_towns.csv")