In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
aicum = pd.read_excel("data/AICUM Vaccination survey 1.19.2021.xlsx")
columns = {
    "Institution":"institution",
    "Survey completed by:":"point_of_contact",
    "Enrolled in MCVP":"MCVP",
    "Submitted Application for MCVP":"submitted_mcvp_app",
    "Date of submission":"date_of_submission",
    "Partnering with hospital/healthcare provider":"partnering",
    "Named local hospital or healthcare provider":"partner_name",
    "Interested in serving as vaccination site/providing staffing and operational support":"vaccination_site_support",
    "Interested in serving as a vaccination site without providing staffing and operational support":"vaccination_site_no_support",
    "All eligible employees vaccinated":"employees_vaccinated",
    "on campus or external entity":"on_campus_or_external"
}
aicum = aicum.rename(columns=columns)
misc_remaps = {
  'Anne Maria College': 'Anna Maria College',  # typo in AICUM survey
  'Holy Cross College': 'College of the Holy Cross',
  
}

def add_college(x):
    exceptions = ["College", "University", "Institute", "Conservatory"]
    universities = ["Bay Path", "Bentley", "Brandeis", "Northeastern", "Suffolk", "Tufts"]
    if x == "MGH Institute":
        return x + " of Health Professions"
    if x == "NE Conservatory":
        return "New England Conservatory of Music"
    elif x == "Montserrat":
        return x + " College of Art"
    elif x == "Olin":
        return "Franklin W. Olin College of Engineering"
    elif x == "Urban":
        return x + " College of Boston"
    elif x == "WPI":
        return "Worcester Polytechnic Institute"
    elif x == "Wentworth":
        return x + " Institute of Technology"
    elif any(university in x for university in universities):
        return x + " University"
    elif any(exception in x for exception in exceptions):
        return x
    else:
        return x + " College"
aicum['institution'] = aicum['institution'].apply(add_college)
aicum_insts = set(aicum['institution'])
print(len(aicum_insts))

38


In [32]:
orig_colleges = gpd.read_file("data/MA_Universities/SHP_dormcap/ma_universities.shp")
#colleges = gpd.read_file("data/MA_Universities/Original/COLLEGES_PT.shp")
#colleges = gpd.read_file("data/MA_Universities/ma_4_year_ipeds.geojson")
#len(colleges)
# print(list(colleges['COLLEGE']))

In [41]:
# Remove some gratuitous listings, as in the spring 2020 analysis.
orig_colleges = orig_colleges[orig_colleges["COLLEGE"] != "University of Massachusetts Dartmouth Center for Innovation and Entrepreneurship"]
orig_colleges = orig_colleges[~((orig_colleges["COLLEGE"] == "Northeastern University") & (orig_colleges["CAMPUS"] != 'Main Campus'))]
orig_colleges = orig_colleges[~((orig_colleges["COLLEGE"] == "Boston College") & (orig_colleges["CAMPUS"] != 'Main Campus'))]

In [42]:
colleges = orig_colleges.merge(aicum, left_on='COLLEGE', right_on='institution')
matched_insts = set(colleges['COLLEGE'])
print(len(matched_insts))

29


In [44]:
aicum_insts.difference(matched_insts)

{'Anne Maria College',
 'Cambridge College',
 'Fisher College',
 'Holy Cross College',
 'Laboure College',
 'MGH Institute of Health Professions',
 'New England Conservatory of Music',
 'Urban College of Boston',
 'William James College'}

In [45]:
filtered_colleges = colleges[
  ['COLLEGE', 'CAMPUS', 'ADDRESS', 'CITY', 'ZIPCODE', 'ENROLL',
   'URL', 'MCVP', 'submitted_mcvp_app', 'partner_name',
   'vaccination_site_support', 'vaccination_site_no_support',
   'employees_vaccinated', 'on_campus_or_external', 'geometry']
]

In [49]:
filtered_colleges = filtered_colleges.to_crs('epsg:4326')

In [55]:
filtered_colleges.set_index('COLLEGE').loc['Boston College']

CAMPUS                                                          Main Campus
ADDRESS                                             140 Commonwealth Avenue
CITY                                                          Chestnut Hill
ZIPCODE                                                               02467
ENROLL                                                                14720
URL                                                       http://www.bc.edu
MCVP                                                                    Yes
submitted_mcvp_app                                                       No
partner_name                                                            NaN
vaccination_site_support                                                 No
vaccination_site_no_support                                              No
employees_vaccinated                                                     No
on_campus_or_external                                                   NaN
geometry    

In [50]:
filtered_colleges.to_file('../covid-vaccines/geojson/ma_universities_with_aicum.geojson', driver='GeoJSON')

# unsure: 
 * Bay Path — University, or Adult Evening? (Parker: University, apparently?)
 * Anne Maria — doesn't show up in dataset
 * Holy Cross — Hellenic College/Holy Cross, or College of the Holy Cross?  (Parker: almost certainly the latter)
 
 
# Missing from SHP_dormcap (which has enrollment...)
* Cambridge College
* Fisher College
* Laboure College (nursing?)
* MGH Institute of Health Professions
* New England Conservatory of Music
* Urban College of Boston
* William James College

These mainly appear to be specialty schools (in particular, most have graduate-level programs in psychology and nursing). We really only need locations (lat, long) and total enrollment—address information, etc. is just gravy—so we could try to pull these from the other shapefiles/GeoJSON files in `data` or—as a last resort—pull some information from Google Maps and school websites.