## Assessing Service Accessibility for Asylum Seekers in Chicago

In [32]:
import pandas as pd
import geopandas as gpd
import folium

from shapely.geometry import Point, MultiPolygon

## I. Loading and Cleaning Data

In [156]:
# Location Data
shelters = pd.read_excel("../data/shelter_data.xlsx",sheet_name=0,engine="openpyxl")
public_libraries = pd.read_json("https://data.cityofchicago.org/resource/x8fc-8rcq.json")
grocery_shops = pd.read_json("https://data.cityofchicago.org/resource/ce29-twzt.json")
wifi_points = pd.read_json("https://data.cityofchicago.org/resource/4jzv-pgsc.json")
public_clinics = pd.read_json("https://data.cityofchicago.org/resource/kcki-hnch.json")
public_schools = pd.read_json("https://data.cityofchicago.org/resource/tz49-n8ze.json")
bus_stations = gpd.read_file("../data/cta_bus_stops/bus_stops_location.shp")
rail_stations = gpd.read_file("../data/cta_rail_stations/CTA_RailStations.shp")

# Boundary Data
neigh_bound = gpd.read_file("../data/neighborhood_boundaries/neigh_bound.shp")
parks_bound = gpd.read_file("../data/parks_boundaries/parks_bound.shp")


# Other Data
arrivals = pd.read_excel("../data/shelter_data.xlsx",
                          sheet_name=1,
                          engine="openpyxl")

### I.a Geocode Datasets

In [217]:
def geocode_df(df,idx):
    cond_1 = ("latitude" and "longitude") in df.columns
    cond_2 = ("y" and "x") in df.columns
    if cond_1 or cond_2:
        if cond_2:
            df.rename(columns={"y": "latitude", "x": "longitude"}, inplace=True)
    else:
        df["latitude"] = df["location"].apply(lambda row: row["latitude"])
        df["longitude"] = df["location"].apply(lambda row: row["longitude"])
    
    df["geometry"] = [Point(lon, lat) for lon, lat in zip(df["longitude"], df["latitude"])]
    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=df["geometry"])

    return gdf

In [226]:
dfs_geocode = ["shelters", "public_libraries", "grocery_shops", "wifi_points",
               "public_clinics", "public_schools"]

for idx, df_name in enumerate(dfs_geocode):
    input_df = globals()[df_name]  # Get the DataFrame by name
    geo_df = geocode_df(input_df,df_name)  # Call the geocode_df function
    globals()["geo_" + df_name] = geo_df  # Assign the result to a new global variable
    print("Created geo_",df_name," using ", df_name,sep="")


Created geo_shelters using shelters
Created geo_public_libraries using public_libraries
Created geo_grocery_shops using grocery_shops
Created geo_wifi_points using wifi_points
Created geo_public_clinics using public_clinics
Created geo_public_schools using public_schools


### I.b Subsetting the Data

### I.x Arrival Data

In [3]:
# Data
arrivals_melt = arrivals.melt(id_vars=["landing_zone","shelter_dummy"], var_name="date")

In [4]:
arrivals_melt["date"] = pd.to_datetime(arrivals_melt["date"])
arrivals_melt["year_month"] = arrivals_melt["date"].dt.strftime('%Y-%m')

In [5]:
arrivals_melt.head()

Unnamed: 0,landing_zone,shelter_dummy,date,value,year_month
0,O'Hare Airport,0,2023-06-09,11.0,2023-06
1,Midway Airport,0,2023-06-09,,2023-06
2,Police Districts (All),0,2023-06-09,459.0,2023-06
3,American Islamic College,1,2023-06-09,,2023-06
4,Brands Park,1,2023-06-09,169.0,2023-06


In [6]:
arrivals_melt.groupby(["landing_zone","shelter_dummy","year_month",]).agg({"value":"sum"}).reset_index()

Unnamed: 0,landing_zone,shelter_dummy,year_month,value
0,American Islamic College,1,2023-06,0.0
1,American Islamic College,1,2023-07,0.0
2,American Islamic College,1,2023-08,2747.0
3,American Islamic College,1,2023-09,4630.0
4,American Islamic College,1,2023-10,4934.0
...,...,...,...,...
259,YWLA,1,2023-09,1238.0
260,YWLA,1,2023-10,989.0
261,YWLA,1,2023-11,984.0
262,YWLA,1,2023-12,1238.0


## III. Plots

#### III.I Shelter locations and Migrant Arrivals

In [8]:
locations.columns

Index(['loc_name', 'lat_lon'], dtype='object')

In [9]:
locations[["latitude","longitude"]] = locations["lat_lon"].str.split(",",expand=True)

In [10]:
locations.head()

Unnamed: 0,loc_name,lat_lon,latitude,longitude
0,Americen Islamic College,"41.955547392826425, -87.64640609999998",41.955547392826425,-87.64640609999998
1,Brands Park,"41.94137572059273, -87.69921562883565",41.94137572059273,-87.69921562883565
2,Broadway Armory,"41.989611556345416, -87.66009473438407",41.989611556345416,-87.66009473438407
3,Casa Esperanza,"41.73548116816464, -87.55940857505853",41.73548116816464,-87.55940857505853
4,Daley College,"41.757198163616906, -87.72284761349309",41.757198163616906,-87.72284761349309


In [11]:
# Construct the geometry variable
geometry = [Point(lon, lat) for lon, lat in zip(locations['longitude'], locations['latitude'])]

# Create a GeoDataFrame using the 'geometry' column
locations_gdf = gpd.GeoDataFrame(locations, geometry=geometry)

# Set the coordinate reference system (CRS) if needed
locations_gdf.crs = "EPSG:4326"