# Pollution and industrial sites, categorized
## Sandra Tilmon
## 12/13/2023


#### Change log:
Date        Change



# Setup

In [None]:
import pandas as pd
import numpy as np

import math
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats

import seaborn as sns

import os
import re
from functools import reduce
import requests

import geopandas as gpd
from shapely.geometry import shape, Point
import geopy
from geopy.extra.rate_limiter import RateLimiter
from geopy import geocoders
from geopy.geocoders import GoogleV3

from timeit import default_timer as timer

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# No scientific notation
pd.options.display.float_format = '{:.2f}'.format

%matplotlib inline

In [None]:
# Mount drive

from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# Raw data sources
dir = '/content/gdrive/My Drive/Sociome_Folder/Data/'
print(dir)

# Output curated datasets
curated = '/content/gdrive/My Drive/Sociome_Folder/Data Commons/Curated datasets/'
curated

/content/gdrive/My Drive/Sociome_Folder/Data/


'/content/gdrive/My Drive/Sociome_Folder/Data Commons/Curated datasets/'

In [None]:
# Empty list to collect dataframe names

frames = []

# GIS

## Set CRS

In [None]:
crs = {'init' :'epsg:3435'}

In [None]:
# Census tracts

r = requests.get("https://data.cityofchicago.org/resource/74p9-q2aq.json")
r.raise_for_status()

data = r.json()
for d in data:
    d['the_geom'] = shape(d['the_geom'])

tracts_shp = gpd.GeoDataFrame(data).set_geometry('the_geom', crs=crs)

tracts_shp = tracts_shp.drop(columns=['statefp10', 'countyfp10', 'tractce10',	'name10', 'namelsad10',
                                      'notes'])

print(tracts_shp.crs)
print(tracts_shp.info())
tracts_shp.head()

+init=epsg:3435 +type=crs
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   the_geom    801 non-null    geometry
 1   geoid10     801 non-null    object  
 2   commarea    801 non-null    object  
 3   commarea_n  801 non-null    object  
dtypes: geometry(1), object(3)
memory usage: 25.2+ KB
None


  in_crs_string = _prepare_from_proj_string(in_crs_string)


Unnamed: 0,the_geom,geoid10,commarea,commarea_n
0,"MULTIPOLYGON (((-87.624 41.730, -87.624 41.730...",17031842400,44,44
1,"MULTIPOLYGON (((-87.686 41.823, -87.686 41.823...",17031840300,59,59
2,"MULTIPOLYGON (((-87.629 41.853, -87.629 41.853...",17031841100,34,34
3,"MULTIPOLYGON (((-87.688 41.856, -87.688 41.856...",17031841200,31,31
4,"MULTIPOLYGON (((-87.633 41.874, -87.633 41.874...",17031839000,32,32


In [None]:
tractsdf = pd.DataFrame(tracts_shp)
tractsdf.drop(columns=['the_geom'], inplace=True)
tractsdf = tractsdf.rename(columns={'geoid10':'GEOID10'})
# tractsdf.info()

# Do NOT stack tractsdf here
# frames.append('tractsdf')
frames

[]

## Functions

In [None]:
# Lat/long coordinates to census tract

def LatLongTract(infile, lat, long):

  # establish lat/long point geometry
  geometry = [Point(xy) for xy in zip(infile[long], infile[lat])]

  # Make a geo dataframe, set CRS
  gdf = gpd.GeoDataFrame(infile, geometry=geometry)
  gdf = gdf.set_crs(crs)

  # Intersect lat/long points and census tract
  result=gpd.sjoin(gdf, tracts_shp, how='left', op='within')

  result.drop(columns=['index_right'], inplace=True)

  # Standardize census tract GEOID to uppercase throughout notebooks
  result.rename(columns={"geoid10" : "GEOID10"}, inplace=True)
  return(result)



# One line street address to lat/long coordinates and census tract

def AddLatLongTract(infile):

  # Create "oneline" address in cleaning "123 N Main Street Chicago, IL 60000"
  # Field names vary so not bothering to include here

  g = geocoders.GoogleV3(api_key='AIzaSyCXvuRDWUvclTJ-DrpEEjL-810JnSNT5JQ')

  def Gv3_latlong (address):
      location = g.geocode(address)
      lat = location.latitude
      long = location.longitude
      # Perform additional steps to obtain census tract from coordinates
      # census_tract = get_census_tract_from_coordinates(latitude, longitude)
      return lat, long, location

  infile[['lat', 'long', 'Location']] = infile['oneline'].apply(Gv3_latlong).apply(pd.Series)

  # establish lat/long point geometry
  geometry = [Point(xy) for xy in zip(infile['long'], infile['lat'])]

  # Make a geo dataframe, set CRS
  gdf = gpd.GeoDataFrame(infile, geometry=geometry)
  gdf = gdf.set_crs(crs)

  # Intersect lat/long points and census tract
  result=gpd.sjoin(gdf, tracts_shp, how='left', op='within')

  result.drop(columns=['index_right'], inplace=True)

  # Standardize census tract GEOID10 to uppercase throughout notebooks
  result.rename(columns={"geoid10" : "GEOID10"}, inplace=True)
  return(result)

# Sites: Reloaded from 62 Environment and pollution and 64 Business, economy, transportation

# *Chicago Data Portal*

## Asbestos

In [None]:
# Reload line
asb = pd.read_csv(dir + 'Chicago data portal/asbestos_line.csv', dtype='string')
asb.drop(columns=['Unnamed: 0'], inplace=True)

asb['Category'] = 'Asbestos'

asb['ADDRESS'] = asb['ADDRESS'] + ', Chicago, IL'

asb = asb.rename(columns={'OWNER/APPLICANT': 'Name', 'ADDRESS': 'Address'})

asb = asb[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea',
           'date', 'year', 'month']]

asb.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,Asbestos,PUBLIC BLDG. COMM. OF CHICAGO,"1 W IRVING PARK RD, Chicago, IL",41.96099580000001,-87.9400153,,,2000-06-09,2000,6
1,Asbestos,WIRTZ REALTY,"115 S WOLCOTT AVE, Chicago, IL",41.8745299,-87.67404409999999,17031838100.0,28.0,1993-02-19,1993,2
2,Asbestos,WIRTZ REALTY,"117 S WOLCOTT AVE, Chicago, IL",41.8745299,-87.67404409999999,17031838100.0,28.0,1993-02-19,1993,2
3,Asbestos,LAKE SHORE RECYCLING SYSTEM,"1240 W EXCHANGE AVE, Chicago, IL",41.8186341,-87.6479286,17031842600.0,61.0,2014-02-27,2014,2
4,Asbestos,CITY OF CHICAGO,"1245 S CALIFORNIA AVE, Chicago, IL",41.8650698,-87.69549260000001,17031843300.0,29.0,2004-10-07,2004,10


In [None]:
frames.append('asb')
print(frames)

## Environmental complaints

In [None]:
# Reload line listing
env = pd.read_csv(dir + 'Chicago data portal/env_complaints_line.csv', dtype='string')

columns_to_convert = ['ADDRESS', 'year', 'month']
env[columns_to_convert] = env[columns_to_convert].astype(str)

env['year'] = env['year'].str[:-2]
env['month'] = env['month'].str[:-2]

env['Category'] = 'Enviro comp ' + env['COMPLAINT TYPE']

env['Name'] = 'Environmental complaint'

env['ADDRESS'] = env['ADDRESS'] + ', Chicago, IL'

env = env.drop(columns=['LATITUDE', 'LONGITUDE'])

env = env.rename(columns={'ADDRESS': 'Address', 'lat': 'LATITUDE', 'long':'LONGITUDE'})

env = env[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

env.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,Enviro comp Noise Complaint,Environmental complaint,"1 N STATE ST, Chicago, IL",41.882436002,-87.62682903,17031320100,32,1994-11-25,1994,11
1,Enviro comp Air Pollution Work Order,Environmental complaint,"100 N LOOMIS ST, Chicago, IL",41.882840594,-87.662404034,17031833000,28,2008-08-19,2008,8
2,Enviro comp Noise Complaint,Environmental complaint,"100 N STATE ST, Chicago, IL",41.883306002,-87.627968681,17031839100,32,1996-05-03,1996,5
3,Enviro comp Air Pollution Work Order,Environmental complaint,"1000 W WASHINGTON ST, Chicago, IL",41.883340996,-87.652662977,17031833000,28,1996-05-08,1996,5
4,Enviro comp Air Pollution Work Order,Environmental complaint,"1001 W WEED ST, Chicago, IL",41.909700862,-87.653183459,17031842200,8,1995-08-30,1995,8


In [None]:
frames.append('env')
print(frames)

['tractsdf', 'asb', 'env']


## Industrial sites from business licenses

In [None]:
# Reload line
lic = pd.read_csv(dir + 'Chicago data portal/Business licenses/Chicago_business_lic_tract.csv')

columns_to_convert = ['ADDRESS', 'CITY', 'STATE', 'ZIP CODE', 'LICENSE DESCRIPTION', 'GEOID10', 'commarea']
lic[columns_to_convert] = lic[columns_to_convert].astype(str)

lic['ZIP CODE'] = lic['ZIP CODE'].str[:5]
lic['GEOID10'] = lic['GEOID10'].str[:-3]
lic['commarea'] = lic['commarea'].str[:-3]

lic['date'] = pd.to_datetime(lic['DATE ISSUED'])
lic['year'] = lic['date'].dt.year
lic['month'] = lic['date'].dt.month

lic['Address1'] = lic['ADDRESS'] + ' ' + lic['CITY'] + ', ' + lic['STATE'] + ' ' + lic['ZIP CODE']

# Define conditions and corresponding categories
conditions = [
    lic['LICENSE DESCRIPTION'].str.contains('admin', case=False),
    lic['LICENSE DESCRIPTION'].str.contains('hazardous', case=False),
    lic['LICENSE DESCRIPTION'].str.contains('manufacturing', case=False),
    lic['LICENSE DESCRIPTION'].str.contains('chem', case=False),
    lic['LICENSE DESCRIPTION'].str.contains('factory', case=False),
    lic['LICENSE DESCRIPTION'].str.contains('metal', case=False),
    lic['LICENSE DESCRIPTION'].str.contains('plastic', case=False),
]

categories = ['Administrative', 'Hazardous', 'Manufacturing', 'Chemical', 'Factory', 'Metal', 'Plastic']

# Create the 'Category' column based on conditions
lic['type'] = np.select(conditions, categories, default='Other')
lic['Category'] =  'Chicago business license: ' + lic['type']

lic = lic.rename(columns={'LEGAL NAME': 'Name', 'Address1': 'Address'})

lic = lic[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

lic.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,Chicago business license: Other,TTX COMPANY,"4016 S ASHLAND AVE CHICAGO, IL 60609",41.82,-87.67,1703161030,6.0,2022-08-08,2022,8
1,Chicago business license: Other,JYM INVESTMENTS LLC,"2321 W HOWARD ST 1ST CHICAGO, IL 60645",42.02,-87.69,1703102010,,2020-12-17,2020,12
2,Chicago business license: Other,LEOPARDO COMPANIES INC.,"210 N CARPENTER ST 3RD FLOOR CHICAGO, IL 60607",41.89,-87.65,1703183300,2.0,2020-08-21,2020,8
3,Chicago business license: Other,"1021 MONTROSE, L.L.C.","1021 W MONTROSE AVE CHICAGO, IL 60613",41.96,-87.66,1703103210,,2022-05-11,2022,5
4,Chicago business license: Other,GRAHAM ENTERPRISE INC,"631-647 N LA SALLE DR CHICAGO, IL 60654",41.89,-87.63,1703108170,,2022-05-11,2022,5


In [None]:
frames.append('lic')
print(frames)

['tractsdf', 'asb', 'env', 'lic']


# *EPA*

## Hazardous waste

In [None]:
# Reload line listing

haz = pd.read_csv(dir + 'EPA/Hazardous waste/Haz_line.csv')

columns_to_convert = ['GEOID10', 'commarea']
haz[columns_to_convert] = haz[columns_to_convert].astype(str)

haz['GEOID10'] = haz['GEOID10'].str[:-3]
haz['commarea'] = haz['commarea'].str[:-3]

haz = haz.rename(columns={'oneline': 'Address', 'RCRAName': 'Name', 'lat': 'LATITUDE', 'long': 'LONGITUDE'})
haz['Category'] = 'EPA hazardous waste'

haz['date'] = np.nan
haz['year'] = np.nan
haz['month'] = np.nan

haz = haz[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

haz.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,EPA hazardous waste,#36284- CORPORATE CAMPUS,3333 BEVERLY RD HOFFMAN ESTATES IL,42.07,-88.22,,,,,
1,EPA hazardous waste,1 HOUR CLEANERS,4274 W LAKE ST MELROSE PARK IL,41.9,-87.89,,,,,
2,EPA hazardous waste,1 HOUR CLEANERS,1110 W GRANVILLE CHICAGO IL,41.99,-87.66,1703103010.0,7.0,,,
3,EPA hazardous waste,1 HOUR CLEANERS,1054 SCHAUMBURG RD STREAMWOOD IL,42.03,-88.15,,,,,
4,EPA hazardous waste,1 HR CLEANERS,5319 W LINCOLN AVE SKOKIE IL,42.03,-87.76,,,,,


In [None]:
frames.append('haz')
print(frames)

['tractsdf', 'asb', 'env', 'lic', 'haz']


## Landfills

In [None]:
# Reload line listing

land = pd.read_csv(dir + 'EPA/Landfill/Chicago_tract.csv', dtype='string')
land.drop(columns=['Unnamed: 0'], inplace=True)

land = land.rename(columns={'oneline': 'Address', 'Landfill Name': 'Name', 'lat': 'LATITUDE', 'long': 'LONGITUDE'})
land['Category'] = 'EPA landfills'

land['date'] = np.nan
land['year'] = np.nan
land['month'] = np.nan

land = land[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

land.head()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,EPA landfills,Alburn Incinerator,2200 E 119th Street Chicago IL 60617,41.6781323,-87.6154477,17031530600,53,,,
1,EPA landfills,AlliedSignal,12260 South Carondolet Avenue Chicago IL 60633,41.6720368,-87.5499766,17031838800,51,,,
2,EPA landfills,Chicago Streets and Sanitation,103rd and Doty Ave Chicago IL 60628,41.7078013,-87.5803744,17031838800,51,,,
3,EPA landfills,CID Recycling and Disposal Facility,13707 S. Jeffery Avenue Chicago IL 60633,41.6458654,-87.57696709999999,17031550200,55,,,
4,EPA landfills,Harbor View,2000 E 122nd Street Chicago IL 60633,41.6739736,-87.57386919999999,17031838800,51,,,


In [None]:
frames.append('land')
print(frames)

['tractsdf', 'asb', 'env', 'lic', 'haz', 'land']


## RSEI

In [None]:
# Reload line listing
rsei = pd.read_csv(dir + 'EPA/RSEI/Cook_Co_RSEI_2012_2021.csv')

columns_to_convert = ['STREET_ADDRESS', 'CITY_NAME', 'STATE_ABBR', 'ZIP_CODE', 'RSEI Media', 'Chemical', 'GEOID10',
                      'commarea']
rsei[columns_to_convert] = rsei[columns_to_convert].astype(str)

rsei['GEOID10'] = rsei['GEOID10'].str[:-3]
rsei['commarea'] = rsei['commarea'].str[:-3]

rsei['date'] = np.nan
rsei['year'] = np.nan
rsei['month'] = np.nan

rsei['Category'] = rsei['RSEI Media'] + ': ' + rsei['Chemical']

rsei = rsei.rename(columns={'PARENT_CO_NAME': 'Name', 'STREET_ADDRESS': 'Address', 'lat': 'LATITUDE', 'long':'LONGITUDE'})

rsei = rsei[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

rsei.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,Stack Air Releases: Cobalt and cobalt compounds,W R GRACE & CO,4099 W 71ST ST,41.76,-87.72,1703165050.0,6.0,,,
1,Stack Air Releases: Cobalt and cobalt compounds,W R GRACE & CO,4099 W 71ST ST,41.76,-87.72,1703165050.0,6.0,,,
2,Fugitive Air Releases: Chromium and chromium c...,No US Parent,2755 W LAKE ST,41.9,-87.87,,,,,
3,Stack Air Releases: Cobalt and cobalt compounds,W R GRACE & CO,4099 W 71ST ST,41.76,-87.72,1703165050.0,6.0,,,
4,Stack Air Releases: Nickel and nickel compounds,W R GRACE & CO,4099 W 71ST ST,41.76,-87.72,1703165050.0,6.0,,,


In [None]:
frames.append('rsei')
frames

['tractsdf', 'asb', 'env', 'lic', 'haz', 'land', 'rsei']

## Superfund and brown

In [None]:
superNot = pd.read_csv(dir + 'EPA/Superfund/Cook_Co_all_sites_line.csv')

columns_to_convert = ['GEOID10', 'commarea']
superNot[columns_to_convert] = superNot[columns_to_convert].astype(str)

superNot['GEOID10'] = superNot['GEOID10'].str[:-3]
superNot['commarea'] = superNot['commarea'].str[:-3]

# Define conditions and corresponding categories
conditions = [
    (superNot['NPL Status'] == 'Final NPL') | (superNot['NPL Status'] == 'Part of NPL Site'),
    (superNot['NPL Status'] == 'Not NPL') | (superNot['NPL Status'] == 'Removed from NPL')
]
categories = ['Superfund', 'Brown site']

# Create the 'Category' column based on conditions
superNot['temp'] = np.select(conditions, categories, default=None)
superNot['Category'] = "EPA NPL list: " + superNot['temp']


superNot['date'] = np.nan
superNot['year'] = np.nan
superNot['month'] = np.nan

superNot = superNot.rename(columns={'Site Name': 'Name', 'oneline': 'Address', 'lat': 'LATITUDE', 'long':'LONGITUDE'})

superNot = superNot[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

superNot.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,EPA NPL list: Brown site,103RD STREET TRANSFER STATION,"103rd Street Chicago, IL 60617",41.71,-87.56,1703151020,5,,,
1,EPA NPL list: Brown site,109TH PLACE TEAR GAS,WEST 109TH PLACE CHICAGO IL Â 60628Â,41.7,-87.63,1703149100,4,,,
2,EPA NPL list: Brown site,1150 LAKE SHORE DRIVE SITE,1150 LAKE SHORE DRIVE CHICAGO IL Â 60611Â,41.87,-87.62,1703132060,3,,,
3,EPA NPL list: Brown site,136TH STREET DRUM,340 EAST 136TH STREET CHICAGO IL Â 60827-1868Â,41.65,-87.61,1703154010,5,,,
4,EPA NPL list: Brown site,34TH & KEDZIE,34TH & KEDZIE CHICAGO IL Â 60623Â,41.83,-87.72,1703183050,3,,,


In [None]:
frames.append('superNot')
frames

['tractsdf', 'asb', 'env', 'lic', 'haz', 'land', 'rsei', 'superNot']

## Wastewater

In [None]:
# Reload line listing
wwater = pd.read_csv(dir + 'EPA/Wastewater/CookCoWWStormBiosolids_line.csv')

columns_to_convert = ['GEOID10', 'commarea']
wwater[columns_to_convert] = wwater[columns_to_convert].astype(str)

wwater['GEOID10'] = wwater['GEOID10'].str[:-3]
wwater['commarea'] = wwater['commarea'].str[:-3]

wwater['date'] = np.nan
wwater['year'] = np.nan
wwater['month'] = np.nan

wwater['Category'] = 'EPA wastewater biosolids'

wwater = wwater.rename(columns={'CWPName': 'Name', 'oneline': 'Address', 'lat': 'LATITUDE', 'long':'LONGITUDE'})

wwater = wwater[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

wwater.head()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,EPA wastewater biosolids,10 SOUTH RIVERSIDE PLAZA,10 SOUTH RIVERSIDE PLAZA CHICAGO IL,41.88,-87.64,1703128190.0,2.0,,,
1,EPA wastewater biosolids,119TH ST #521,1819 WEST 119TH CHICAGO IL,41.68,-87.67,1703175050.0,7.0,,,
2,EPA wastewater biosolids,120 SOUTH RIVERSIDE PLAZA,120 SOUTH RIVERSIDE PLAZA CHICAGO IL,41.88,-87.64,1703128190.0,2.0,,,
3,EPA wastewater biosolids,13 UNIT CONDOS,10335 DEARLOVE RD GLENVIEW IL,42.07,-87.86,,,,,
4,EPA wastewater biosolids,1420/1430 MEACHAM REDEVELOPMENT,1420 MEACHAM ROAD SCHAUMBURG IL,42.06,-88.05,,,,,


In [None]:
frames.append('wwater')
print(frames)

['tractsdf', 'asb', 'env', 'lic', 'haz', 'land', 'rsei', 'superNot', 'wwater']


# *HIFLD*

## Manufacturing

In [None]:
mfg = pd.read_csv(dir + 'Homeland infrastructure/HIFLD mfg facilities/Chicago_facil_line.csv')

columns_to_convert = ['ADDRESS', 'CITY', 'STATE', 'ZIP']
mfg[columns_to_convert] = mfg[columns_to_convert].astype(str)

mfg['Address1'] = mfg['ADDRESS'] + ' ' + mfg['CITY'] + ', ' + mfg['STATE'] + ' ' + mfg['ZIP']

mfg['GEOID10'] = wwater['GEOID10'].str[:-3]
mfg['commarea'] = wwater['commarea'].str[:-3]

mfg['date'] = np.nan
mfg['year'] = np.nan
mfg['month'] = np.nan

mfg['Category'] = 'HIFLD mfg: ' + mfg['PRODUCT']

mfg = mfg.rename(columns={'NAME': 'Name', 'Address1': 'Address'})

mfg = mfg[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

mfg.head()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,HIFLD mfg: MACHINE ALUMINUM CASTINGS,AMCO MACHINE,"900 PRECISION STREET SOMONAUK, IL 60552",41.64,-88.67,1703128.0,,,,
1,"HIFLD mfg: STEEL, ALUMINUM & COPPER RIVETS","AMERICAN COLD-HEADED PRODUCTS, INC.","253 WEST STEPHENIE DRIVE CORTLAND, IL 60112",41.92,-88.7,1703175.0,,,,
2,HIFLD mfg: COMPUTER & INTERFACE TESTING EQUIPMENT,B&B ELECTRONICS MFG. CO.,"707 DAYTON ROAD OTTAWA, IL 61350",41.38,-88.82,1703128.0,,,,
3,HIFLD mfg: MACHINING JOB SHOP,"AMERICAN MACHINING, INC.","405 EAST LAFAYETTE STREET SOMONAUK, IL 60552",41.64,-88.68,,,,,
4,HIFLD mfg: BAKING MACHINERY,"BAKERY MACHINERY & FABRICATION, INC.","307 BAKERY AVENUE PERU, IL 61354",41.33,-89.11,,,,,


In [None]:
mfg['Category'].value_counts().head(40)

HIFLD mfg: NEWSPAPER PUBLISHING                             111
HIFLD mfg: COMMERCIAL PRINTING                               86
HIFLD mfg: PLASTIC INJECTION MOLDING                         65
HIFLD mfg: READY-MIXED CONCRETE                              60
HIFLD mfg: MACHINING JOB SHOP                                47
HIFLD mfg: SHEET METAL FABRICATION                           34
HIFLD mfg: PRECISION MACHINING JOB SHOP                      31
HIFLD mfg: PRINTED CIRCUIT BOARDS                            29
HIFLD mfg: MAGAZINE PUBLISHING                               27
HIFLD mfg: STEEL FABRICATION                                 26
HIFLD mfg: SCREW MACHINE PRODUCTS                            24
HIFLD mfg: METAL FABRICATION                                 22
HIFLD mfg: CORRUGATED BOXES                                  22
HIFLD mfg: STRUCTURAL STEEL FABRICATION                      20
HIFLD mfg: OFFSET PRINTING                                   17
HIFLD mfg: WOODEN PALLETS               

In [None]:
mfg['Category'].value_counts()[41:80]

HIFLD mfg: CRUSHED STONE                         8
HIFLD mfg: INDUSTRIAL CHEMICALS                  8
HIFLD mfg: SCREEN PRINTING                       7
HIFLD mfg: SCRAP METAL PROCESSING                7
HIFLD mfg: PRECIOUS METAL JEWELRY                7
HIFLD mfg: LIMESTONE PROCESSING                  6
HIFLD mfg: COMMERCIAL PRINTING & TYPESETTING     6
HIFLD mfg: PLASTIC EXTRUSIONS                    6
HIFLD mfg: CORRUGATED CARTONS                    6
HIFLD mfg: PRECAST CONCRETE PRODUCTS             6
HIFLD mfg: INTERIOR & EXTERIOR SIGNS             6
HIFLD mfg: NEWSPAPER PRINTING                    6
HIFLD mfg: LASER PRINTING                        6
HIFLD mfg: FASTENERS                             6
HIFLD mfg: BUSINESS FORM PRINTING                6
HIFLD mfg: DIGITAL PRINTING                      6
HIFLD mfg: ADHESIVES                             6
HIFLD mfg: ELECTRONIC PREPRESS                   6
HIFLD mfg: PAPER CONVERTING                      5
HIFLD mfg: CONTRACT ASSEMBLY   

In [None]:
mfg['Category'].value_counts()[81:120]

HIFLD mfg: NEWSPAPER PUBLISHING & COMMERCIAL PRINTING            5
HIFLD mfg: POWDER COATING                                        5
HIFLD mfg: PHARMACEUTICALS                                       5
HIFLD mfg: COMMERCIAL LITHOGRAPHIC PRINTING                      5
HIFLD mfg: PRINTING INKS                                         5
HIFLD mfg: PHARMACEUTICAL PACKAGING                              4
HIFLD mfg: AIR FILTERS                                           4
HIFLD mfg: PRECISION GRINDING JOB SHOP                           4
HIFLD mfg: HAIR CARE PRODUCTS                                    4
HIFLD mfg: ICE                                                   4
HIFLD mfg: PLASTIC PRODUCTS                                      4
HIFLD mfg: HARD CHROME PLATING                                   4
HIFLD mfg: COMPANY HEADQUARTERS & NEWSPAPER PUBLISHING           4
HIFLD mfg: INDUSTRIAL FASTENERS                                  4
HIFLD mfg: COMMERCIAL & LITHOGRAPHIC PRINTING                 

In [None]:
mfg['Category'].value_counts().tail(40)

HIFLD mfg: CUSTOM SHEET METAL FABRICATION, INCLUDING COLD ROLLED STEEL, STAINLESS STEEL & ALUMINUM                                     1
HIFLD mfg: CORPORATE HEADQUARTERS & INDUSTRIAL CASTERS & CONVEYORS                                                                     1
HIFLD mfg: CHAINLINK, ALUMINUM & WOODEN FENCING                                                                                        1
HIFLD mfg: STRUCTURAL STEEL & ORNAMENTAL IRON FABRICATION, INCLUDING BAR JOISTS, DECKING, CRANE LIFTS & PRECAST ERECTION               1
HIFLD mfg: NONMETALLIC GASKETS, DIE-CUT PARTS, PRESSURE SENSITIVE & DRY-BACKED ADHESIVE MATERIALS                                      1
HIFLD mfg: WATER PURIFICATION COMPONENTS & SYSTEMS FOR POTABLE WATER, PROCESS WATER, WATER RECYCLING & INDUSTRIAL & MUNICIPAL WASTE    1
HIFLD mfg: HEALTH CARE, AUTOMOTIVE & INDUSTRIAL PACKAGING PLASTIC BAGS & CAN LINERS                                                    1
HIFLD mfg: PRINT INSPECTION, DEFECT DETEC

In [None]:
frames.append('mfg')
frames

['tractsdf',
 'asb',
 'env',
 'lic',
 'haz',
 'land',
 'rsei',
 'superNot',
 'wwater',
 'mfg']

## Power plants

In [None]:
power = pd.read_csv(dir + 'Homeland infrastructure/HIFLD EPA FRS power plants/Chicago_power_line.csv')


columns_to_convert = ['LOCATION_A', 'CITY_NAME', 'STATE_CODE', 'POSTAL_COD']
power[columns_to_convert] = power[columns_to_convert].astype(str)

power['Zip'] = power['POSTAL_COD'].str[:5]

power['Address1'] = power['LOCATION_A'] + ' ' + power['CITY_NAME'] + ', ' + power['STATE_CODE'] + ' ' + power['Zip']

power['date'] = np.nan
power['year'] = np.nan
power['month'] = np.nan

power['Category'] = 'HIFLD power plants: ' +  power['ENERGY_SRC']

# Extracting longitude and latitude
power[['LONGITUDE', 'LATITUDE']] = power['geometry'].str.extract(r'POINT \(([^ ]+) ([^ ]+)\)')

power = power.rename(columns={'PRIMARY_NA': 'Name', 'Address1': 'Address'})

power = power[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

power.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,HIFLD power plants: Disillate Fuel Oil (all Di...,FISK GENERATING STATION,"1111 W. CERMAK RD. CHICAGO, IL 60608",41.8508,-87.653297,17031843200,31,,,
1,HIFLD power plants: Natural Gas,CALUMET ENERGY POWER STATION,"11601 S TORRENCE AVE CHICAGO, IL 60617",41.683701,-87.556602,17031838800,51,,,
2,HIFLD power plants: Natural Gas,M & M/MARS,"2019 N OAK PARK AVE CHICAGO, IL 60707",41.917301,-87.793503,17031250500,25,,,
3,HIFLD power plants: Natural Gas,ST MARY OF NAZARETH HOSPITAL CENTER,"2233 WEST DIVISION ST CHICAGO, IL 60622",41.901699,-87.683899,17031242400,24,,,
4,HIFLD power plants: Natural Gas,UNIVERSITY OF ILLINOIS COGEN FACILITY,"1140 S. MORGAN CHICAGO, IL 60607",41.867802,-87.6511,17031833300,28,,,


In [None]:
frames.append('power')
frames

['tractsdf',
 'asb',
 'env',
 'lic',
 'haz',
 'land',
 'rsei',
 'superNot',
 'wwater',
 'mfg',
 'power']

## Wastewater plants

In [None]:
watertreat = pd.read_csv(dir + 'Homeland infrastructure/HIFLD EPA Wastewater treatment plants/Wastewater_Treatment_Plants_Chicago_line.csv')
watertreat['date'] = pd.to_datetime(watertreat['CWP_EFFECTIVE_DATE'])
watertreat.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 72 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   Unnamed: 0                     5 non-null      int64              
 1   X                              5 non-null      float64            
 2   Y                              5 non-null      float64            
 3   OBJECTID_1                     5 non-null      int64              
 4   NPDES_ID                       5 non-null      object             
 5   REGISTRY_ID                    5 non-null      int64              
 6   CWP_NAME                       5 non-null      object             
 7   CWP_STREET                     5 non-null      object             
 8   CWP_CITY                       5 non-null      object             
 9   CWP_STATE                      5 non-null      object             
 10  CWP_ZIP                       

In [None]:
watertreat = pd.read_csv(dir + 'Homeland infrastructure/HIFLD EPA Wastewater treatment plants/Wastewater_Treatment_Plants_Chicago_line.csv')

columns_to_convert = ['GEOID10', 'commarea', 'CWP_EFFECTIVE_DATE']
watertreat[columns_to_convert] = watertreat[columns_to_convert].astype(str)

watertreat['GEOID10'] = watertreat['GEOID10'].str[:-3]
watertreat['commarea'] = watertreat['commarea'].str[:-3]

watertreat['date'] = watertreat['CWP_EFFECTIVE_DATE'].str.split(' ').str[0]
watertreat['date'] = pd.to_datetime(watertreat['date'])
watertreat['year'] = watertreat['date'].dt.year
watertreat['month'] = watertreat['date'].dt.month

watertreat['Category'] = 'HIFLD/EPA water treatment: ' +  watertreat['STATE_WATER_BODY_NAME']

# Extracting longitude and latitude
watertreat[['LONGITUDE', 'LATITUDE']] = watertreat['geometry'].str.extract(r'POINT \(([^ ]+) ([^ ]+)\)')

watertreat = watertreat.rename(columns={'CWP_NAME': 'Name', 'oneline': 'Address'})

watertreat = watertreat[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

watertreat.head()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,HIFLD/EPA water treatment: LITTLE CALUMET RIVER,MWRDGC CALUMET WRP,400 EAST 130TH STREET CHICAGO IL 60628,41.6592971,-87.6073334,1703154010.0,5.0,2017-08-01,2017,8
1,HIFLD/EPA water treatment: CHICAGO SANITARY AN...,MWRDGC STICKNEY WRP,6001 WEST PERSHING ROAD CHICAGO IL 60804,41.816225,-87.7738735,,,2014-01-01,2014,1
2,HIFLD/EPA water treatment: CHICAGO SANITARY & ...,MWRDGC LEMONT WRP,13 STEPHEN STREET CHICAGO IL 60439,41.6771925,-87.9999619,,,2008-02-01,2008,2
3,HIFLD/EPA water treatment: CHICAGO RIVER,DUS MANAGEMENT INC,35 E WACKER DRIVE SUITE 600 CHICAGO IL 60601,41.8865744,-87.6267937,1703132010.0,3.0,2017-12-01,2017,12
4,HIFLD/EPA water treatment: SALT CREEK,MWRDGC-JOHN E. EGAN WRP,550 SOUTH MEACHAM ROAD CHICAGO IL 60193,42.0198641,-88.0377008,,,2007-09-01,2007,9


In [None]:
frames.append('watertreat')
print(frames)

['tractsdf', 'asb', 'env', 'lic', 'haz', 'land', 'rsei', 'superNot', 'wwater', 'mfg', 'power', 'watertreat']


# *Manually searched*

## Industrial sites, categorized

In [None]:
# manual = pd.read_csv('/content/gdrive/My Drive/Sociome_Folder/Data Commons/Industrial sites - Data.csv')

# columns_to_convert = ['Address', 'City', 'State', 'Zip']
# manual[columns_to_convert] = manual[columns_to_convert].astype(str)

# manual['oneline'] = manual['Address'] + ' ' + manual['City'] + ', ' + manual['State'] + ' ' + manual['Zip']

# manual.head()

Unnamed: 0,Name,Address,City,State,Zip,Category,Notes,oneline
0,"Ardagh Metal Bevarage USA, Inc",1101 W 43rd St 1,Chicago,IL,60609,Metal Supplier,,"1101 W 43rd St 1 Chicago, IL 60609"
1,Popular Iron Works,7800 S Claremont Ave,Chicago,IL,60620,Metal Supplier,,"7800 S Claremont Ave Chicago, IL 60620"
2,"Chromium Industries, LLC",2625-4653 W Chicago Ave,Chicago,IL,60651,"Drug, Chemiecal, Paint Factory",,"2625-4653 W Chicago Ave Chicago, IL 60651"
3,Siegal Steel,4747 S Kedzie Ave 1st,Chicago,IL,60632,Metal Supplier,,"4747 S Kedzie Ave 1st Chicago, IL 60632"
4,The Azek Group LLC,1330 W Fulton St 3rd Suite 350,Chicago,IL,60607,"Manufacturing of Plastics, Foams, Construction...",,"1330 W Fulton St 3rd Suite 350 Chicago, IL 60607"


### GIS

In [None]:
# # def AddLatLongTract(infile):

# manual2 = AddLatLongTract(manual)

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
# # manual2.to_csv(curated + 'Industrial sites - Data_geocoded.csv', index = False)

# # Reload
manual2 = pd.read_csv(curated + 'Industrial sites - Data_geocoded.csv')

manual2.head()

Unnamed: 0,Name,Address,City,State,Zip,Category,Notes,oneline,lat,long,Location,geometry,GEOID10,commarea,commarea_n
0,"Ardagh Metal Bevarage USA, Inc",1101 W 43rd St 1,Chicago,IL,60609,Metal Supplier,,"1101 W 43rd St 1 Chicago, IL 60609",41.82,-87.65,"1101 W 43rd St #1, Chicago, IL 60609, USA",POINT (-87.6535784 41.8157338),17031842600.0,61.0,61.0
1,Popular Iron Works,7800 S Claremont Ave,Chicago,IL,60620,Metal Supplier,,"7800 S Claremont Ave Chicago, IL 60620",41.75,-87.68,"7800 S Claremont Ave, Chicago, IL 60620, USA",POINT (-87.6818991 41.7517738),17031700100.0,70.0,70.0
2,"Chromium Industries, LLC",2625-4653 W Chicago Ave,Chicago,IL,60651,"Drug, Chemiecal, Paint Factory",,"2625-4653 W Chicago Ave Chicago, IL 60651",41.9,-87.69,"2625 W Chicago Ave, Chicago, IL 60622, USA",POINT (-87.69286489999999 41.8955613),17031242800.0,24.0,24.0
3,Siegal Steel,4747 S Kedzie Ave 1st,Chicago,IL,60632,Metal Supplier,,"4747 S Kedzie Ave 1st Chicago, IL 60632",41.81,-87.7,"4747 S Kedzie Ave #1st, Chicago, IL 60632, USA",POINT (-87.70332859999999 41.8068439),17031842800.0,58.0,58.0
4,The Azek Group LLC,1330 W Fulton St 3rd Suite 350,Chicago,IL,60607,"Manufacturing of Plastics, Foams, Construction...",,"1330 W Fulton St 3rd Suite 350 Chicago, IL 60607",41.89,-87.66,"1330 W Fulton St 3rd 350, Chicago, IL 60607, USA",POINT (-87.6599246 41.8867989),17031833000.0,28.0,28.0


In [None]:
manual2['date'] = np.nan
manual2['year'] = np.nan
manual2['month'] = np.nan


manual2['Category2'] = 'Manual search: ' +  manual2['Category']

manual2['geometry'] = manual2['geometry'].astype(str)
# Extracting longitude and latitude
manual2[['LONGITUDE', 'LATITUDE']] = manual2['geometry'].str.extract(r'POINT \(([^ ]+) ([^ ]+)\)')

manual2.drop(columns=['Address', 'Category'], inplace=True)

manual2 = manual2.rename(columns={'oneline': 'Address', 'Category2': 'Category'})

manual2 = manual2[['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']]

manual2.head()

Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,Manual search: Metal Supplier,"Ardagh Metal Bevarage USA, Inc","1101 W 43rd St 1 Chicago, IL 60609",41.8157338,-87.6535784,17031842600.0,61.0,,,
1,Manual search: Metal Supplier,Popular Iron Works,"7800 S Claremont Ave Chicago, IL 60620",41.7517738,-87.6818991,17031700100.0,70.0,,,
2,"Manual search: Drug, Chemiecal, Paint Factory","Chromium Industries, LLC","2625-4653 W Chicago Ave Chicago, IL 60651",41.8955613,-87.69286489999999,17031242800.0,24.0,,,
3,Manual search: Metal Supplier,Siegal Steel,"4747 S Kedzie Ave 1st Chicago, IL 60632",41.8068439,-87.70332859999999,17031842800.0,58.0,,,
4,"Manual search: Manufacturing of Plastics, Foam...",The Azek Group LLC,"1330 W Fulton St 3rd Suite 350 Chicago, IL 60607",41.8867989,-87.6599246,17031833000.0,28.0,,,


In [None]:
frames.append('manual2')
print(frames)

['tractsdf', 'asb', 'env', 'lic', 'haz', 'land', 'rsei', 'superNot', 'wwater', 'mfg', 'power', 'watertreat', 'manual2']


# Merge (and dedup)

In [None]:
# frames.remove('tractsdf')
print(frames)

frames = [asb, env, lic, haz, land, rsei, superNot, wwater, mfg, power, watertreat, manual2]

['asb', 'env', 'lic', 'haz', 'land', 'rsei', 'superNot', 'wwater', 'mfg', 'power', 'watertreat', 'manual2']


In [None]:
asb = asb.reset_index(drop=True)
env = env.reset_index(drop=True)
lic = lic.reset_index(drop=True)
haz = haz.reset_index(drop=True)
land = land.reset_index(drop=True)
rsei = rsei.reset_index(drop=True)
superNot = superNot.reset_index(drop=True)
wwater = wwater.reset_index(drop=True)
mfg = mfg.reset_index(drop=True)
power = power.reset_index(drop=True)
watertreat = watertreat.reset_index(drop=True)
manual2 = manual2.reset_index(drop=True)

In [None]:
print("asb")
print(asb.columns.to_list())
print("env")
print(env.columns.to_list())
print("lic")
print(lic.columns.to_list())
print("haz")
print(haz.columns.to_list())
print("land")
print(land.columns.to_list())

asb
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
env
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
lic
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
haz
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
land
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']


'rsei', 'superNot', 'wwater', 'mfg', 'power', 'watertreat', 'manual2'

In [None]:
print("rsei")
print(rsei.columns.to_list())
print("superNot")
print(superNot.columns.to_list())
print("wwater")
print(wwater.columns.to_list())
print("mfg")
print(mfg.columns.to_list())
print("power")
print(power.columns.to_list())
print("watertreat")
print(watertreat.columns.to_list())
print("manual2")
print(manual2.columns.to_list())

rsei
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
superNot
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
wwater
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
mfg
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
power
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
watertreat
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']
manual2
['Category', 'Name', 'Address', 'LATITUDE', 'LONGITUDE', 'GEOID10', 'commarea', 'date', 'year', 'month']


In [None]:
# ['asb', 'env', 'lic', 'haz', 'land', 'rsei', 'superNot', 'wwater', 'mfg', 'power', 'watertreat', 'manual2']

# list1 = [asb, env]
# stacked1 = pd.concat(list1, ignore_index=True)

# list2 = [stacked1, lic, haz, land]
# stacked2 = pd.concat(list2, ignore_index=True)

# list3 = [stacked2, rsei, superNot, wwater, mfg]
# stacked3 = pd.concat(list3, ignore_index=True)

# list4 = [stacked3, power, watertreat, manual2]
# stacked4 = pd.concat(list4, ignore_index=True)

print(stacked4.shape)

stacked5 = stacked4.drop_duplicates()
print(stacked5.shape)

stacked6 = stacked5.drop_duplicates(subset=['Name', 'Address'])
print(stacked6.shape)

stacked7 = stacked6.dropna(subset=['LATITUDE', 'LONGITUDE'])
print(stacked7.shape)

stacked8 = stacked7.dropna(subset=['Address'])
print(stacked8.shape)

stacked9 = stacked8.dropna(subset=['GEOID10'])
print(stacked9.shape)

(1228755, 10)
(1057269, 10)
(375450, 10)
(375450, 10)
(375450, 10)
(354901, 10)


In [None]:
stacked9['Category'].value_counts().head(30)

Chicago business license: Other                               218273
Asbestos                                                       89255
Enviro comp Air Pollution Work Order                            8289
EPA hazardous waste                                             7665
Enviro comp Illegal Dumping Work Order                          5000
Enviro comp Toxics Hazardous Materials Work Order               4103
Enviro comp Noise Complaint                                     3833
Enviro comp AIR POLLUTION WORK ORDER                            3661
Chicago business license: Manufacturing                         1927
Enviro comp ILLEGAL DUMPING WORK ORDER                          1655
Enviro comp Other                                               1643
Enviro comp NOISE COMPLAINT                                     1620
Enviro comp Asbestos Work Order                                 1582
Enviro comp CONSTRUCTION AND DEMOLITION                         1549
Chicago business license: Hazardou

# Export and reload

In [None]:
# with open(curated +  'colab62_pollution_sites_latlong.csv', 'w') as f:
#   stacked9.to_csv(f)

In [None]:
# Reload
c62_Poll_sites = pd.read_csv(curated + 'colab62_pollution_sites_latlong.csv')

  c62_Poll_sites = pd.read_csv(curated + 'colab62_pollution_sites_latlong.csv')


In [None]:
print(c62_Poll_sites.info())
c62_Poll_sites.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 354901 entries, 0 to 354900
Data columns (total 11 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   Unnamed: 0  354901 non-null  int64  
 1   Category    354901 non-null  object 
 2   Name        354316 non-null  object 
 3   Address     354901 non-null  object 
 4   LATITUDE    354901 non-null  float64
 5   LONGITUDE   354901 non-null  float64
 6   GEOID10     354901 non-null  float64
 7   commarea    301750 non-null  float64
 8   date        346200 non-null  object 
 9   year        346201 non-null  object 
 10  month       346201 non-null  object 
dtypes: float64(4), int64(1), object(6)
memory usage: 29.8+ MB
None


Unnamed: 0.1,Unnamed: 0,Category,Name,Address,LATITUDE,LONGITUDE,GEOID10,commarea,date,year,month
0,1,Asbestos,WIRTZ REALTY,"115 S WOLCOTT AVE, Chicago, IL",41.87,-87.67,17031838100.0,28.0,1993-02-19,1993,2
1,2,Asbestos,WIRTZ REALTY,"117 S WOLCOTT AVE, Chicago, IL",41.87,-87.67,17031838100.0,28.0,1993-02-19,1993,2
2,3,Asbestos,LAKE SHORE RECYCLING SYSTEM,"1240 W EXCHANGE AVE, Chicago, IL",41.82,-87.65,17031842600.0,61.0,2014-02-27,2014,2
3,4,Asbestos,CITY OF CHICAGO,"1245 S CALIFORNIA AVE, Chicago, IL",41.87,-87.7,17031843300.0,29.0,2004-10-07,2004,10
4,5,Asbestos,CITY OF CHICAGO,"1256 W 47TH PL, Chicago, IL",41.81,-87.66,17031611100.0,61.0,1999-07-08,1999,7
