In [1]:
#Libraries
import pandas as pd
import numpy as np
import geopandas as gdp
import requests
from bs4 import BeautifulSoup
import geocoder
from datetime import date
import json
from pandas.io.json import json_normalize
import folium
import geojson
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import re
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [2]:
def getsouptable(url, cls):
    response = requests.get(url=url)
    soup = BeautifulSoup(response.content, 'html.parser')
    ctab = soup.find('table', class_=cls)
    cbod = ctab.find('tbody')
    chead = ctab.find('thead')

    rows = chead.find_all('tr')
    heads = []
    for r in rows:
        cols = r.find_all('th')
        cols = [elem.text.strip() for elem in cols]
        heads.append([elem for elem in cols if elem])

    rows = cbod.find_all('tr')
    stuff = []
    for r in rows:
        cols = r.find_all('td')
        cols = [elem.text.strip() for elem in cols]
        stuff.append([elem for elem in cols if elem])
        
    df = pd.DataFrame(stuff)
    df.columns = heads
    return df

In [3]:
def rawnum(numstr):

    for x in range(0, len(numstr)-1):
        if numstr[x]=="[":
            numstr = numstr[0:x]
            break
            
    for x in range(len(numstr)-1, -1, -1):
        if not numstr[x].isnumeric():
            if numstr[x]==".":
                continue
            else:
                numstr = numstr.replace(numstr[x],"")
    
    return numstr

In [4]:
url = 'https://www.housingstudies.org/data-portal/browse/?indicator=total-foreclosure-activity&area=chicago-community-areas&property_type=0&view_as=view-table'
cls = 'table table-striped portal-table'
df_forc = getsouptable(url, cls)

In [5]:
df_forc

Unnamed: 0,Geography,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Albany Park,32,57,149,252,342,375,257,222,123,61,61,44,47,31,36
1,Archer Heights,19,34,43,97,112,128,97,81,45,34,30,20,22,22,14
2,Armour Square,6,5,6,7,8,21,13,25,14,6,2,5,5,3,--
3,Ashburn,246,284,358,527,529,629,514,610,384,263,211,205,235,201,182
4,Auburn Gresham,371,390,520,602,474,515,431,493,336,300,285,255,282,248,234
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,West Lawn,75,116,204,377,427,479,349,336,265,136,111,97,105,83,65
73,West Pullman,342,489,556,586,432,388,330,426,260,192,190,199,163,163,139
74,West Ridge,70,138,314,611,823,711,488,455,232,105,79,80,85,68,55
75,West Town,130,146,263,441,584,479,412,319,182,96,75,55,66,59,60


In [6]:
keeps = [0,13,14,15]
df_forc = df_forc[df_forc.columns[keeps]]

In [7]:
df_forc

Unnamed: 0,Geography,2017,2018,2019
0,Albany Park,47,31,36
1,Archer Heights,22,22,14
2,Armour Square,5,3,--
3,Ashburn,235,201,182
4,Auburn Gresham,282,248,234
...,...,...,...,...
72,West Lawn,105,83,65
73,West Pullman,163,163,139
74,West Ridge,85,68,55
75,West Town,66,59,60


In [8]:
df_forc = df_forc.replace('--',0)
df_forc

Unnamed: 0,Geography,2017,2018,2019
0,Albany Park,47,31,36
1,Archer Heights,22,22,14
2,Armour Square,5,3,0
3,Ashburn,235,201,182
4,Auburn Gresham,282,248,234
...,...,...,...,...
72,West Lawn,105,83,65
73,West Pullman,163,163,139
74,West Ridge,85,68,55
75,West Town,66,59,60


In [9]:
df_forc['2017'] = df_forc['2017'].astype('int64')
df_forc['2018'] = df_forc['2018'].astype('int64')
df_forc['2019'] = df_forc['2019'].astype('int64')
df_forc

Unnamed: 0,Geography,2017,2018,2019
0,Albany Park,47,31,36
1,Archer Heights,22,22,14
2,Armour Square,5,3,0
3,Ashburn,235,201,182
4,Auburn Gresham,282,248,234
...,...,...,...,...
72,West Lawn,105,83,65
73,West Pullman,163,163,139
74,West Ridge,85,68,55
75,West Town,66,59,60


In [10]:
sums = [1,2,3]
df_forc['Forclosures'] = df_forc[df_forc.columns[sums]].sum(axis=1)
df_forc

Unnamed: 0,Geography,2017,2018,2019,Forclosures
0,Albany Park,47,31,36,114
1,Archer Heights,22,22,14,58
2,Armour Square,5,3,0,8
3,Ashburn,235,201,182,618
4,Auburn Gresham,282,248,234,764
...,...,...,...,...,...
72,West Lawn,105,83,65,253
73,West Pullman,163,163,139,465
74,West Ridge,85,68,55,208
75,West Town,66,59,60,185


In [11]:
url = 'https://www.housingstudies.org/data-portal/browse/?indicator=total-mortgage-activity&area=chicago-community-areas&property_type=0&view_as=view-table'
cls = 'table table-striped portal-table'
df_morg = getsouptable(url, cls)
df_morg

Unnamed: 0,Geography,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Albany Park,2782,2618,1983,1205,985,903,769,1025,936,716,834,1023,827,697,809
1,Archer Heights,802,638,475,255,190,167,155,219,203,176,185,187,174,151,170
2,Armour Square,397,341,361,306,242,217,166,198,187,127,142,155,165,167,141
3,Ashburn,4474,3798,3054,1443,1166,874,782,1069,1179,868,879,1053,999,876,910
4,Auburn Gresham,3202,2847,2503,1261,911,576,501,575,632,564,583,708,735,759,821
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,West Lawn,2914,2269,1608,823,601,467,454,605,680,476,533,585,562,457,487
73,West Pullman,2762,2342,1891,950,559,381,282,286,395,300,292,282,336,381,432
74,West Ridge,4377,4407,3624,1914,1657,1415,1243,1573,1481,1020,1243,1438,1280,1098,1176
75,West Town,7263,7108,6698,4851,4185,3643,3062,4180,4057,3284,4025,4372,3517,2743,3742


In [12]:
keeps = [0,13,14,15]
df_morg = df_morg[df_morg.columns[keeps]]
df_morg

Unnamed: 0,Geography,2017,2018,2019
0,Albany Park,827,697,809
1,Archer Heights,174,151,170
2,Armour Square,165,167,141
3,Ashburn,999,876,910
4,Auburn Gresham,735,759,821
...,...,...,...,...
72,West Lawn,562,457,487
73,West Pullman,336,381,432
74,West Ridge,1280,1098,1176
75,West Town,3517,2743,3742


In [13]:
for x in range(0,df_morg.shape[0]):
    for y in range(1, 4):
        df_morg.iat[x,y] = rawnum(df_morg.iat[x,y])
df_morg

Unnamed: 0,Geography,2017,2018,2019
0,Albany Park,827,697,809
1,Archer Heights,174,151,170
2,Armour Square,165,167,141
3,Ashburn,999,876,910
4,Auburn Gresham,735,759,821
...,...,...,...,...
72,West Lawn,562,457,487
73,West Pullman,336,381,432
74,West Ridge,1280,1098,1176
75,West Town,3517,2743,3742


In [14]:
df_morg['2017'] = df_morg['2017'].astype('int64')
df_morg['2018'] = df_morg['2018'].astype('int64')
df_morg['2019'] = df_morg['2019'].astype('int64')
df_morg['Mortgages'] = df_morg[df_morg.columns[sums]].sum(axis=1)
df_morg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_morg['2017'] = df_morg['2017'].astype('int64')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_morg['2018'] = df_morg['2018'].astype('int64')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_morg['2019'] = df_morg['2019'].astype('int64')
A value is trying to be set on a copy of a slice from a

Unnamed: 0,Geography,2017,2018,2019,Mortgages
0,Albany Park,827,697,809,2333
1,Archer Heights,174,151,170,495
2,Armour Square,165,167,141,473
3,Ashburn,999,876,910,2785
4,Auburn Gresham,735,759,821,2315
...,...,...,...,...,...
72,West Lawn,562,457,487,1506
73,West Pullman,336,381,432,1149
74,West Ridge,1280,1098,1176,3554
75,West Town,3517,2743,3742,10002


In [15]:
url = 'https://www.housingstudies.org/data-portal/browse/?indicator=poverty-rate&area=chicago-community-areas&view_as=view-table'
cls = 'table table-striped portal-table'
df_pov = getsouptable(url, cls)

In [16]:
df_pov

Unnamed: 0,Geography,Share of Population that is under the Poverty Level
0,Albany Park,16.2%
1,Archer Heights,15.0%
2,Armour Square,32.8%
3,Ashburn,12.8%
4,Auburn Gresham,25.4%
...,...,...
72,West Lawn,13.6%
73,West Pullman,22.9%
74,West Ridge,21.6%
75,West Town,10.7%


In [17]:
for x in range(0,df_pov.shape[0]):
    df_pov.iat[x,1] = float(rawnum(df_pov.iat[x,1]))
    df_pov.iat[x,1] = df_pov.iat[x,1]/100
df_pov

Unnamed: 0,Geography,Share of Population that is under the Poverty Level
0,Albany Park,0.162
1,Archer Heights,0.15
2,Armour Square,0.328
3,Ashburn,0.128
4,Auburn Gresham,0.254
...,...,...
72,West Lawn,0.136
73,West Pullman,0.229
74,West Ridge,0.216
75,West Town,0.107


In [18]:
url = 'https://www.housingstudies.org/data-portal/browse/?indicator=population-and-age&area=chicago-community-areas&view_as=view-table'
cls = 'table table-striped portal-table'
df_pop = getsouptable(url, cls)
df_pop

Unnamed: 0,Geography,Share of Population Aged under 18,Share of Population Aged 18 to 44,Share of Population Aged 45 to 64,Share of Population Aged over 65,Total Population
0,Albany Park,22.4%,45.3%,22.1%,10.1%,49806
1,Archer Heights,30.0%,38.4%,18.9%,12.7%,13726
2,Armour Square,16.7%,32.2%,26.1%,24.9%,13538
3,Ashburn,26.2%,35.5%,26.6%,11.7%,43356
4,Auburn Gresham,22.8%,32.4%,26.8%,17.9%,45909
...,...,...,...,...,...,...
72,West Lawn,28.5%,40.0%,21.0%,10.4%,31886
73,West Pullman,24.7%,34.8%,24.6%,15.9%,27028
74,West Ridge,26.3%,36.5%,23.9%,13.3%,78466
75,West Town,13.5%,64.8%,15.7%,5.9%,83757


In [19]:
df2 = df_pov.set_index('Geography').join(df_pop.set_index('Geography'))
df2

Unnamed: 0_level_0,Share of Population that is under the Poverty Level,Share of Population Aged under 18,Share of Population Aged 18 to 44,Share of Population Aged 45 to 64,Share of Population Aged over 65,Total Population
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"(Albany Park,)",0.162,22.4%,45.3%,22.1%,10.1%,49806
"(Archer Heights,)",0.15,30.0%,38.4%,18.9%,12.7%,13726
"(Armour Square,)",0.328,16.7%,32.2%,26.1%,24.9%,13538
"(Ashburn,)",0.128,26.2%,35.5%,26.6%,11.7%,43356
"(Auburn Gresham,)",0.254,22.8%,32.4%,26.8%,17.9%,45909
...,...,...,...,...,...,...
"(West Lawn,)",0.136,28.5%,40.0%,21.0%,10.4%,31886
"(West Pullman,)",0.229,24.7%,34.8%,24.6%,15.9%,27028
"(West Ridge,)",0.216,26.3%,36.5%,23.9%,13.3%,78466
"(West Town,)",0.107,13.5%,64.8%,15.7%,5.9%,83757


In [20]:
keeps=[0,5]
df2 = df2[df2.columns[keeps]]
df2

Unnamed: 0_level_0,Share of Population that is under the Poverty Level,Total Population
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1
"(Albany Park,)",0.162,49806
"(Archer Heights,)",0.15,13726
"(Armour Square,)",0.328,13538
"(Ashburn,)",0.128,43356
"(Auburn Gresham,)",0.254,45909
...,...,...
"(West Lawn,)",0.136,31886
"(West Pullman,)",0.229,27028
"(West Ridge,)",0.216,78466
"(West Town,)",0.107,83757


In [21]:
for x in range(0,df2.shape[0]):
    df2.iat[x,1] = int(rawnum(df2.iat[x,1]))

In [22]:
df2['Poverty'] = df2[df2.columns[0]] * df2[df2.columns[1]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Poverty'] = df2[df2.columns[0]] * df2[df2.columns[1]]


In [23]:
df2 = df2[df2.columns[list([0,1,2])]]
df2

Unnamed: 0_level_0,Share of Population that is under the Poverty Level,Total Population,Poverty
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(Albany Park,)",0.162,49806,8068.572
"(Archer Heights,)",0.15,13726,2058.9
"(Armour Square,)",0.328,13538,4440.464
"(Ashburn,)",0.128,43356,5549.568
"(Auburn Gresham,)",0.254,45909,11660.886
...,...,...,...
"(West Lawn,)",0.136,31886,4336.496
"(West Pullman,)",0.229,27028,6189.412
"(West Ridge,)",0.216,78466,16948.656
"(West Town,)",0.107,83757,8961.999


In [24]:
df2 = df2.join(df_forc.set_index('Geography'))

In [25]:
df2 = df2[df2.columns[list([1,2,6])]]
df2

Unnamed: 0_level_0,Total Population,Poverty,Forclosures
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(Albany Park,)",49806,8068.572,114
"(Archer Heights,)",13726,2058.9,58
"(Armour Square,)",13538,4440.464,8
"(Ashburn,)",43356,5549.568,618
"(Auburn Gresham,)",45909,11660.886,764
...,...,...,...
"(West Lawn,)",31886,4336.496,253
"(West Pullman,)",27028,6189.412,465
"(West Ridge,)",78466,16948.656,208
"(West Town,)",83757,8961.999,185


In [26]:
df2 = df2.join(df_morg.set_index('Geography'))

In [27]:
df2 = df2[df2.columns[list([0,1,2,6])]]
df2

Unnamed: 0_level_0,Total Population,Poverty,Forclosures,Mortgages
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(Albany Park,)",49806,8068.572,114,2333
"(Archer Heights,)",13726,2058.9,58,495
"(Armour Square,)",13538,4440.464,8,473
"(Ashburn,)",43356,5549.568,618,2785
"(Auburn Gresham,)",45909,11660.886,764,2315
...,...,...,...,...
"(West Lawn,)",31886,4336.496,253,1506
"(West Pullman,)",27028,6189.412,465,1149
"(West Ridge,)",78466,16948.656,208,3554
"(West Town,)",83757,8961.999,185,10002


In [28]:
df2 = df2.reset_index()
df2.columns.values[0]='Geography'
df2

Unnamed: 0,Geography,Total Population,Poverty,Forclosures,Mortgages
0,"(Albany Park,)",49806,8068.572,114,2333
1,"(Archer Heights,)",13726,2058.9,58,495
2,"(Armour Square,)",13538,4440.464,8,473
3,"(Ashburn,)",43356,5549.568,618,2785
4,"(Auburn Gresham,)",45909,11660.886,764,2315
...,...,...,...,...,...
72,"(West Lawn,)",31886,4336.496,253,1506
73,"(West Pullman,)",27028,6189.412,465,1149
74,"(West Ridge,)",78466,16948.656,208,3554
75,"(West Town,)",83757,8961.999,185,10002


In [29]:
df2 = df2[df2.columns[list([0,1,2,3,4])]]
df2

Unnamed: 0,Geography,Total Population,Poverty,Forclosures,Mortgages
0,"(Albany Park,)",49806,8068.572,114,2333
1,"(Archer Heights,)",13726,2058.9,58,495
2,"(Armour Square,)",13538,4440.464,8,473
3,"(Ashburn,)",43356,5549.568,618,2785
4,"(Auburn Gresham,)",45909,11660.886,764,2315
...,...,...,...,...,...
72,"(West Lawn,)",31886,4336.496,253,1506
73,"(West Pullman,)",27028,6189.412,465,1149
74,"(West Ridge,)",78466,16948.656,208,3554
75,"(West Town,)",83757,8961.999,185,10002


In [30]:
for x in range(0,df2.shape[0]):
    df2.iat[x,0]=df2.iat[x,0][0]


In [31]:
df2

Unnamed: 0,Geography,Total Population,Poverty,Forclosures,Mortgages
0,Albany Park,49806,8068.572,114,2333
1,Archer Heights,13726,2058.9,58,495
2,Armour Square,13538,4440.464,8,473
3,Ashburn,43356,5549.568,618,2785
4,Auburn Gresham,45909,11660.886,764,2315
...,...,...,...,...,...
72,West Lawn,31886,4336.496,253,1506
73,West Pullman,27028,6189.412,465,1149
74,West Ridge,78466,16948.656,208,3554
75,West Town,83757,8961.999,185,10002


In [32]:
#df2.to_csv('Chi_Comm_Character.csv', index=False)