# Clinical sites, categorized
## Sandra Tilmon
## 12/14/2023


#### Change log:
Date        Change



# Setup

In [None]:
import pandas as pd
import numpy as np

import math
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats

import seaborn as sns

import os
import re
from functools import reduce
import requests

import geopandas as gpd
from shapely.geometry import shape, Point
import geopy
from geopy.extra.rate_limiter import RateLimiter
from geopy import geocoders
from geopy.geocoders import GoogleV3

from timeit import default_timer as timer

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# No scientific notation
pd.options.display.float_format = '{:.2f}'.format

%matplotlib inline

In [None]:
# Mount drive

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Raw data sources
dir = '/content/gdrive/My Drive/Sociome_Folder/Data/'
print(dir)

# Output curated datasets
curated = '/content/gdrive/My Drive/Sociome_Folder/Data Commons/Curated datasets/'
curated

/content/gdrive/My Drive/Sociome_Folder/Data/


'/content/gdrive/My Drive/Sociome_Folder/Data Commons/Curated datasets/'

In [None]:
# Empty list to collect dataframe names

frames = []

# Functions

In [None]:
# Lat/long coordinates to census tract

def LatLongTract(infile, lat, long):

  # establish lat/long point geometry
  geometry = [Point(xy) for xy in zip(infile[long], infile[lat])]

  # Make a geo dataframe, set CRS
  gdf = gpd.GeoDataFrame(infile, geometry=geometry)
  gdf = gdf.set_crs(crs)

  # Intersect lat/long points and census tract
  result=gpd.sjoin(gdf, tracts_shp, how='left', op='within')

  result.drop(columns=['index_right'], inplace=True)

  # Standardize census tract GEOID to uppercase throughout notebooks
  result.rename(columns={"geoid10" : "GEOID10"}, inplace=True)
  return(result)



# One line street address to lat/long coordinates and census tract

def AddLatLongTract(infile):

  # Create "oneline" address in cleaning "123 N Main Street Chicago, IL 60000"
  # Field names vary so not bothering to include here

  g = geocoders.GoogleV3(api_key='AIzaSyCXvuRDWUvclTJ-DrpEEjL-810JnSNT5JQ')

  def Gv3_latlong (address):
      location = g.geocode(address)
      lat = location.latitude
      long = location.longitude
      # Perform additional steps to obtain census tract from coordinates
      # census_tract = get_census_tract_from_coordinates(latitude, longitude)
      return lat, long, location

  infile[['lat', 'long', 'Location']] = infile['oneline'].apply(Gv3_latlong).apply(pd.Series)

  # establish lat/long point geometry
  geometry = [Point(xy) for xy in zip(infile['long'], infile['lat'])]

  # Make a geo dataframe, set CRS
  gdf = gpd.GeoDataFrame(infile, geometry=geometry)
  gdf = gdf.set_crs(crs)

  # Intersect lat/long points and census tract
  result=gpd.sjoin(gdf, tracts_shp, how='left', op='within')

  result.drop(columns=['index_right'], inplace=True)

  # Standardize census tract GEOID10 to uppercase throughout notebooks
  result.rename(columns={"geoid10" : "GEOID10"}, inplace=True)
  return(result)

# Chicago Data Portal

## Primary care -- reloaded from 66 Health

In [None]:
# Reload tract summary
primary = pd.read_csv(dir + 'Chicago data portal/PrimaryCare_Line.csv', dtype='string')
primary.drop(columns=['Unnamed: 0'], inplace=True)

primary = primary[['facility', 'fqhc_look_alike_or_neither_special_notes', 'latitude', 'longitude', 'oneline', 'GEOID10', 'commarea']]
primary.head()

Unnamed: 0,facility,fqhc_look_alike_or_neither_special_notes,latitude,longitude,oneline,GEOID10,commarea
0,Mercy Family Health Center @ Oakwood Shores,Look-alike,41.826722480000456,-87.60828727799964,"3753 S. Cottage Grove Chicago, IL 60653",17031836500,36
1,ACCESS Southwest Family Health Center,FQHC,41.80757461200045,-87.74460191199967,"4839 W. 47th Street Chicago, IL 60638",17031560300,56
2,Heartland Health Outreach- Refugee Health,FQHC; specialize in refugee health,41.96843068300046,-87.65485692899966,"4750 N Sheridan Rd Chicago, IL 60640",17031830700,3
3,Heartland Health Center- Hibbard Elementary Sc...,FQHC; School-based health center (open to comm...,41.97084703000047,-87.70977434799966,"4930 North Sawyer Avenue Chicago, IL 60625",17031140200,14
4,Near North - Winfield Moody Health Center,FQHC,41.90535451700049,-87.64178597999967,"1276 N. Clybourn Chicago, IL 60610",17031080400,8


In [None]:
# pd.set_option('display.max_colwidth', None)
primary['fqhc_look_alike_or_neither_special_notes'].value_counts(dropna=False)

FQHC                                                 78
FQHC; School-based health center (open to comm...    13
neither; county government clinic                     8
Look-alike                                            6
neither                                               2
FQHC; pediatric care  only                            1
FQHC; specialize in refugee health                    1
neither; pediatric and adolescents only               1
FQHC; adolescent health only                          1
neither; pediatric, adolescent, and women's he...     1
neither; School-based health center (open to a...     1
neither; free clinic; uninsured patients only;...     1
FQHC; specialize in healthcare for homeless           1
FQHC, urgent care center                              1
neither; volunteer-based free clinic  Days/ Ho...     1
neither; free clinic; Uninsured patients only;...     1
neither; county government clinic; pediatric c...     1
neither; volunteer-based free clinic, Days/Hou..

In [None]:
primary['Category'] = np.where(primary['fqhc_look_alike_or_neither_special_notes'].str.contains("FQHC", case=False), "Primary care: FQHC", None)
primary['Category'] = np.where(primary['fqhc_look_alike_or_neither_special_notes'].str.contains("look alike", case=False), "Primary care: FQHC", primary['Category'])
primary['Category'] = np.where(primary['fqhc_look_alike_or_neither_special_notes'].str.contains("government clinic", case=False), "Primary care: Government clinic", primary['Category'])
primary['Category'] = np.where(primary['fqhc_look_alike_or_neither_special_notes'].str.contains("free clinic", case=False), "Primary care: Free clinic", primary['Category'])
primary['Category'] = primary['Category'].fillna("Other")

pd.crosstab(primary['fqhc_look_alike_or_neither_special_notes'], primary['Category'])

Category,Other,Primary care: FQHC,Primary care: Free clinic,Primary care: Government clinic
fqhc_look_alike_or_neither_special_notes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FQHC,0,78,0,0
"FQHC, urgent care center",0,1,0,0
FQHC; School-based health center (open to comm...,0,13,0,0
FQHC; adolescent health only,0,1,0,0
FQHC; pediatric care only,0,1,0,0
FQHC; specialize in healthcare for homeless,0,1,0,0
FQHC; specialize in refugee health,0,1,0,0
Look-alike,6,0,0,0
neither,2,0,0,0
neither; School-based health center (open to a...,1,0,0,0


lic = lic[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea']]

In [None]:
primary = primary.rename(columns={'facility': 'Name', 'oneline': 'Address',
                                  'latitude': 'LATITUDE', 'longitude': 'LONGITUDE'})
# Reorder
primary = primary[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea']]
primary.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea
0,Other,Mercy Family Health Center @ Oakwood Shores,"3753 S. Cottage Grove Chicago, IL 60653",41.826722480000456,-87.60828727799964,17031836500,36
1,Primary care: FQHC,ACCESS Southwest Family Health Center,"4839 W. 47th Street Chicago, IL 60638",41.80757461200045,-87.74460191199967,17031560300,56
2,Primary care: FQHC,Heartland Health Outreach- Refugee Health,"4750 N Sheridan Rd Chicago, IL 60640",41.96843068300046,-87.65485692899966,17031830700,3
3,Primary care: FQHC,Heartland Health Center- Hibbard Elementary Sc...,"4930 North Sawyer Avenue Chicago, IL 60625",41.97084703000047,-87.70977434799966,17031140200,14
4,Primary care: FQHC,Near North - Winfield Moody Health Center,"1276 N. Clybourn Chicago, IL 60610",41.90535451700049,-87.64178597999967,17031080400,8


In [None]:
frames.append('primary')
frames

['tractsdf', 'primary']

# HIFLD

## Hospitals - reloaded from 66 Health

In [None]:
# Reload line listing
hosp = pd.read_csv(dir + 'Homeland infrastructure/HIFLD Hospitals/HospCookLine.csv', dtype='string')
hosp.drop(columns=['Unnamed: 0'], inplace=True)
hosp.head()

Unnamed: 0,LATITUDE,LONGITUDE,NAME,ADDRESS,CITY,STATE,ZIP,TYPE,POPULATION,OWNER,BEDS,TRAUMA,HELIPAD,geometry,GEOID10,commarea
0,41.79706055,-87.88639493,ADVENTIST MIDWEST HEALTH,5101 SOUTH WILLOW SPRINGS ROAD,LA GRANGE,IL,60525,GENERAL ACUTE CARE,186.0,NON-PROFIT,186.0,LEVEL II,Y,POINT (-87.88639493 41.79706055),,
1,41.72169452,-87.73244225,ADVOCATE CHRIST HOSPITAL AND MEDICAL CENTER,4440 W 95TH STREET,OAK LAWN,IL,60453,GENERAL ACUTE CARE,802.0,NON-PROFIT,802.0,LEVEL I,Y,POINT (-87.73244225 41.72169452),,
2,41.93674085,-87.65137166,ADVOCATE NORTHSIDE HEALTH NETWORK,836 W WELLINGTON AVENUE,CHICAGO,IL,60657,GENERAL ACUTE CARE,397.0,NON-PROFIT,397.0,LEVEL I,N,POINT (-87.65137166 41.93674085),17031063000.0,6.0
3,41.56630404,-87.69756973,ADVOCATE SOUTHLAND HEALTH NETWORK,17800 S KEDZIE AVENUE,HAZEL CREST,IL,60429,GENERAL ACUTE CARE,233.0,NON-PROFIT,233.0,NOT AVAILABLE,Y,POINT (-87.69756973 41.56630404),,
4,41.72650439,-87.56722618,ADVOCATE SOUTHLAND HEALTH NETWORK,2320 E 93RD ST,CHICAGO,IL,60617,GENERAL ACUTE CARE,205.0,NON-PROFIT,205.0,NOT AVAILABLE,N,POINT (-87.56722618 41.72650439),17031480400.0,48.0


In [None]:
hosp['TYPE'].value_counts(dropna=False)

hosp['Category'] = 'Hospital: ' + hosp['TYPE']

In [None]:
# Reorder
hosp = hosp.rename(columns={'NAME': 'Name', 'ADDRESS':'Address'})

hosp = hosp[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea']]

In [None]:
frames.append('hosp')
frames

['tractsdf', 'primary', 'hosp']

## Rx -- reloaded from 66 Health

In [None]:
# Reload
rx = pd.read_csv(dir + 'Homeland infrastructure/RX open facilities/ChicagoRx_line.csv', dtype='string')

rx['Zip'] = rx['Zip'].str[:5]

rx['Address2'] = rx['Address'] + ' ' + rx['City'] + ' ' + rx['State'] + ' ' + rx['Zip']
rx = rx.rename(columns={'Address':'Boop'})
rx = rx.rename(columns={'Address2': 'Address', 'latitude': 'LATITUDE', 'longitude': 'LONGITUDE'})
rx['Category'] = 'Pharmacy'

rx = rx[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea']]

rx.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea
0,Pharmacy,WALGREENS #03074,4700 S Halsted St CHICAGO IL 60609,41.808732,-87.645911,17031611000,61
1,Pharmacy,WALGREENS #00118,5650 W BELMONT AVE CHICAGO IL 60634,41.938879,-87.76867,17031151200,15
2,Pharmacy,WALGREENS #05192,2924 E 92ND ST CHICAGO IL 60617,41.728376,-87.552996,17031461000,46
3,Pharmacy,WALGREENS #05124,8628 S COTTAGE GROVE AVE CHICAGO IL 60619,41.73795,-87.605564,17031440202,44
4,Pharmacy,WALGREENS #00162,1554 E 55TH STREET CHICAGO IL 60615,41.795697,-87.588049,17031410800,41


In [None]:
frames.append('rx')
frames

['tractsdf', 'primary', 'hosp', 'rx']

# HRSA

## FQHCs -- reloaded from 66 Health

In [None]:
# Reload tract summary
fqhc = pd.read_csv(dir + 'HRSA/FQHC_ChicagoLine.csv', dtype='string')

fqhc = fqhc.rename(columns={'Category': 'Cat', 'lat':'LATITUDE', 'long': 'LONGITUDE',
                            'oneline': 'Address'})

fqhc['Category'] = 'FQHC: ' + fqhc['Cat']

# Reorder
fqhc = fqhc[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea']]

fqhc.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea
0,FQHC: Administrative/Service Delivery Site,Infant Welfare Society of Chicago dba Angel Ha...,"3600 W Fullerton Ave Chicago, IL 606...",41.9248971,-87.71730010000002,17031220701,22
1,FQHC: Service Delivery Site,Haymarket Center,"108 N Sangamon St Chicago, IL 60607-...",41.8833902,-87.6512022,17031833000,28
2,FQHC: Administrative/Service Delivery Site,Haymarket Center,"124 N Sangamon St Chicago, IL 60607-...",41.8838078,-87.6510481,17031833000,28
3,FQHC: Service Delivery Site,Haymarket Center,"120 N Sangamon St Chicago, IL 60607-...",41.8836033,-87.6512099,17031833000,28
4,FQHC: Administrative/Service Delivery Site,Beloved Community Family Wellness Center,"6821 S Halsted St Chicago, IL 60621-...",41.7699662,-87.6441968,17031681200,68


In [None]:
frames.append('fqhc')
frames

['tractsdf', 'primary', 'hosp', 'rx', 'fqhc']

# Merge

In [None]:
# frames
frames2 = [primary, hosp, rx, fqhc]

In [None]:
Health_sites66 = pd.concat(frames2, axis=0)
Health_sites66.tail()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea
185,FQHC: Service Delivery Site,Esperanza Academy for Global Citizenship Annex,"4941 W 46th St RM 100 Chicago, IL 60...",41.8091394,-87.74684549999999,17031560200,56
186,FQHC: Service Delivery Site,Esperanza at Cultivate Collective,"4350 S Laporte Ave Chicago, IL 60638",41.813403,-87.74739,17031560200,56
187,FQHC: Service Delivery Site,IMAN - Youth and Family Health Center,"2749 W 63rd St Chicago, IL 60629-2342",41.7788903,-87.6929145,17031660600,66
188,FQHC: Service Delivery Site,IMAN-Main Site,"2744 W 63rd St Chicago, IL 60629-2343",41.779335,-87.69281819999999,17031835000,66
189,FQHC: Service Delivery Site,IMAN Mobile Health Unit,"2744 W 63rd St Chicago, IL 60629-2343",41.779335,-87.69281819999999,17031835000,66


# Export and reload

In [None]:
with open(curated +  'colab66_health_sites_latlong.csv', 'w') as f:
  Health_sites66.to_csv(f)