## Imports

In [1]:
import googlemaps
import pandas as pd
import numpy as np
import re
import os
import geopandas as gpd
import folium
import requests
from bs4 import BeautifulSoup
from shapely.geometry import Point

## Read-in

In [2]:
df = pd.read_csv('LISF_March_April.csv')

In [3]:
df.columns

Index(['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'View Doc', 'Doc Number',
       'Doc Recorded', 'Doc Executed', 'Doc Type', 'Consi. Amt.',
       '1st Grantor', '1st Grantee', 'Assoc. Doc#', '1st PIN', 'deed_urls'],
      dtype='object')

In [4]:
# Define the regex pattern to split the text
pattern = r'(\d+-\d+-\d+-\d+-\d+)\s(.*)'

# Apply regex and split the text into two columns
df[['PIN', 'Address']] = df['1st PIN'].str.extract(pattern)

# Remove leading/trailing whitespace from the address column
df['Address'] = df['Address'].str.strip()

In [5]:
df = df.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0','1st PIN'])

## Clean, drop, and convert

In [6]:
df['Doc Recorded'] = pd.to_datetime(df['Doc Recorded'])

## Data Stuff

In [7]:
old_month = 3
new_month = 4

In [8]:
# Count number of new month dates
mar_dates_count = len(df.loc[df['Doc Recorded'].dt.month == old_month])

print(f'Number of March dates: {mar_dates_count}')

Number of March dates: 1210


In [9]:
# Count number of old month dates
apr_dates_count = len(df.loc[df['Doc Recorded'].dt.month == new_month])

print(f'Number of April dates: {apr_dates_count}')

Number of April dates: 567


In [10]:
df['1st Grantor'] = df['1st Grantor'].fillna('NA')
df['1st Grantee'] = df['1st Grantee'].fillna('NA')

In [11]:
municipal_authority = ['city', 'town', 'municipality', 'village','transit auth','department of transp']  # list of municipal authority keywords
df = df[~df['1st Grantor'].str.contains('|'.join(municipal_authority), case=False) & 
        ~df['1st Grantee'].str.contains('|'.join(municipal_authority), case=False)]

In [12]:
# create new column and assign colors based on month
df['COLOR'] = df['Doc Recorded'].apply(lambda x: 'orange' if x.month == old_month else 'red' if x.month == new_month else '')

## Get mortgage amounts

In [13]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'TE': 'Trailers'
}

In [14]:
def mortgage_url_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    for link in soup.find_all('a', href=True):
        if link['href'].startswith('/Document/Detail'):
            mortgage_url = 'https://crs.cookcountyclerkil.gov' + link['href']
            return mortgage_url

In [15]:
def mortgage_consi_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find all tr tags
    trs = soup.find_all('tr')

    # Loop through each tr tag and look for the td tag containing 'Consideration Amount' label
    for tr in trs:
        td = tr.find('th', text='Consideration Amount:')
        if td:
            # If the td tag is found, get the next td tag containing the amount
            amount_td = td.find_next_sibling('td')
            if amount_td:
                # Print the amount
                amount = amount_td.text.strip()
                return amount
            else:
                return 'not found'

In [16]:
df['mortgage_urls'] = df['deed_urls'].apply(lambda x: mortgage_url_snagger(x, headers))

In [17]:
df['mortgage_amount'] = df['mortgage_urls'].apply(lambda x: mortgage_consi_snagger(x, headers) if x is not None else None)

  td = tr.find('th', text='Consideration Amount:')


In [18]:
df[['mortgage_urls','mortgage_amount']] = df[['mortgage_urls','mortgage_amount']].fillna('NA')

## Geocode

In [19]:
df['geo_address'] = df['Address'] + ' Cook County, IL'

In [20]:
len(df)

1394

In [21]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [22]:
# Define the geocode function
def geocode(add):
    g = gmaps_key.geocode(add)
    if g:
        lat = g[0]["geometry"]["location"]["lat"]
        lng = g[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply geocoding to the 'geo_address' column and store the results in 'geocoded' column
df['geocoded'] = df['geo_address'].apply(geocode)

In [23]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

## HTML Popup Formatter

In [24]:
df.columns

Index(['View Doc', 'Doc Number', 'Doc Recorded', 'Doc Executed', 'Doc Type',
       'Consi. Amt.', '1st Grantor', '1st Grantee', 'Assoc. Doc#', 'deed_urls',
       'PIN', 'Address', 'COLOR', 'mortgage_urls', 'mortgage_amount',
       'geo_address', 'geocoded', 'lat', 'lon'],
      dtype='object')

In [42]:
def popup_html(row):
    grantor = row['1st Grantor']
    grantee = row['1st Grantee']
    PIN = row['PIN']
    Address = row['Address']
    mortgage_amount = row['mortgage_amount']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Lender: </strong>{}'''.format(grantor) + '''<br>
    <strong>Borrower: </strong>{}'''.format(grantee) + '''<br>
    <strong>PIN: </strong>{}'''.format(PIN) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>Mortgage Amount: </strong>{}'''.format(mortgage_amount) + '''<br>
    </html>
    '''
    return html

In [43]:
df

Unnamed: 0,View Doc,Doc Number,Doc Recorded,Doc Executed,Doc Type,Consi. Amt.,1st Grantor,1st Grantee,Assoc. Doc#,deed_urls,PIN,Address,COLOR,mortgage_urls,mortgage_amount,geo_address,geocoded,lat,lon
0,View,2307445306,2023-03-15,,LIS PENDENS FORECLOSURE,,FEDERAL HOME LOAN MTG CORP TR,WETER DALE R,1.315808e+09,https://crs.cookcountyclerkil.gov/Document/Det...,12-26-308-027-0000,"8635 W CAREY AVE, RIVER GROVE",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$143,400.00","8635 W CAREY AVE, RIVER GROVE Cook County, IL","(41.9284093, -87.84204489999999)",41.928409,-87.842045
1,View,2307445303,2023-03-15,,LIS PENDENS FORECLOSURE,,SPECIALIZED LOAN SERVICING LLC,DALY JOHN,1.935149e+09,https://crs.cookcountyclerkil.gov/Document/Det...,25-09-109-101-0000,"9652 S UNION AVE, CHICAGO",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$141,000.00","9652 S UNION AVE, CHICAGO Cook County, IL","(41.7181068, -87.6410203)",41.718107,-87.641020
2,View,2307445176,2023-03-15,,LIS PENDENS FORECLOSURE,,US BK TRUST NATL ASSN TR,THOMAS-WILEY JANICE A,7.323330e+08,https://crs.cookcountyclerkil.gov/Document/Det...,21-31-109-005-0000,"8001 S PHILLIPS AVE, CHICAGO",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$369,000.00","8001 S PHILLIPS AVE, CHICAGO Cook County, IL","(41.7497311, -87.56471479999999)",41.749731,-87.564715
3,View,2307445023,2023-03-15,3/10/2023,LIS PENDENS FORECLOSURE,,JPMORGAN CHASE BK NATL ASSN,BHAGWANDAS B EZRA TRUST,1.408310e+09,https://crs.cookcountyclerkil.gov/Document/Det...,16-07-121-045-1010,"151 N KENILWORTH AVE, OAK PARK",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$86,000.00","151 N KENILWORTH AVE, OAK PARK Cook County, IL","(41.8890712, -87.79792750000001)",41.889071,-87.797928
4,View,2307445012,2023-03-15,3/9/2023,LIS PENDENS FORECLOSURE,,US BK TRUST NATL ASSN TR,MARQUEZ ALFONSO,5.194352e+08,https://crs.cookcountyclerkil.gov/Document/Det...,19-26-113-005-0000,"7215 S LAWNDALE AVE, CHICAGO",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$220,400.00","7215 S LAWNDALE AVE, CHICAGO Cook County, IL","(41.7619202, -87.7147024)",41.761920,-87.714702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1772,View,2306715004,2023-03-08,,LIS PENDENS FORECLOSURE,,CHICAGO 601 RANDOLPH LLC,MAGIC PS LLC,,https://crs.cookcountyclerkil.gov/Document/Det...,17-09-331-015-0000,"611 W RANDOLPH ST, CHICAGO",orange,,,"611 W RANDOLPH ST, CHICAGO Cook County, IL","(41.8842265, -87.6433624)",41.884226,-87.643362
1773,View,2306710209,2023-03-08,3/2/2023,LIS PENDENS FORECLOSURE,,CITIZENS BK NA,DANIEL KENNETH M,6.179433e+08,https://crs.cookcountyclerkil.gov/Document/Det...,30-06-200-073-0000,"3045 E 138TH PL, BURNHAM",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$115,500.00","3045 E 138TH PL, BURNHAM Cook County, IL","(41.6442757, -87.5432361)",41.644276,-87.543236
1774,View,2306710207,2023-03-08,3/2/2023,LIS PENDENS FORECLOSURE,,NATIONSTAR MTG LLC,MOORE-MCCLINE YOLANDA,1.634706e+09,https://crs.cookcountyclerkil.gov/Document/Det...,30-20-104-011-0000,"1285 BURNHAM AVE, CALUMET CITY",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$95,144.00","1285 BURNHAM AVE, CALUMET CITY Cook County, IL","(41.5978364, -87.5391798)",41.597836,-87.539180
1775,View,2306710206,2023-03-08,3/2/2023,LIS PENDENS FORECLOSURE,,US BK NATL ASSN,WHITE ALEXANDER GRAHAM,1.401029e+09,https://crs.cookcountyclerkil.gov/Document/Det...,28-16-303-055-1002,"15804 LARAMIE AVE, OAK FOREST",orange,https://crs.cookcountyclerkil.gov/Document/Det...,"$68,000.00","15804 LARAMIE AVE, OAK FOREST Cook County, IL","(41.6045738, -87.7468263)",41.604574,-87.746826


In [44]:
import folium
from folium.plugins import MarkerCluster
import numpy as np

m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Cook County Pending Foreclosures')

caption_html = '''
                <p align="center" style="vertical-align: bottom; font-size:13px"><i>{}</i></p>
                '''.format('March & April')

### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=9.5, tiles=None)

# Create two FeatureGroups for different color pins
fg_orange = folium.FeatureGroup(name='April') ## new month
fg_red = folium.FeatureGroup(name='March') ## old month

for index, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    color = row['COLOR']
    if pd.notnull(lat) and pd.notnull(lon) and color == 'orange':
        marker = folium.CircleMarker(
            location=[lat, lon],
            radius=10,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_orange)
    elif pd.notnull(lat) and pd.notnull(lon):
        marker = folium.CircleMarker(
            location=[lat,lon],
            raidus=5,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_red)
    else:
        continue

# Add the FeatureGroups to the map
fg_orange.add_to(m)
fg_red.add_to(m)

folium.TileLayer('OpenStreetMap', control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))
m.get_root().html.add_child(folium.Element(caption_html))
folium.TileLayer('CartoDBpositron', control=False).add_to(m) 
            
# Display map
m

In [45]:
m.save('index.html')

## Map URL Snagger

In [29]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/lis_pendens_scraper_mar_apr


## Stats for story

In [30]:
# Count number of old month dates
mar_dates_count = len(df.loc[df['Doc Recorded'].dt.month == old_month])

print(f'Number of March dates: {mar_dates_count}')

# Count number of new month dates
apr_dates_count = len(df.loc[df['Doc Recorded'].dt.month == new_month])

print(f'Number of April dates: {apr_dates_count}')

Number of March dates: 912
Number of April dates: 482


In [31]:
mar = df.loc[df['Doc Recorded'].dt.month == old_month]
apr = df.loc[df['Doc Recorded'].dt.month == new_month]

In [35]:
print(f'March: ${mar.mortgage_amount_int.sum():,}')
print('-------')
print(f'April: ${apr.mortgage_amount_int.sum():,}')
print('-------')
print(f'Total: ${mar.mortgage_amount_int.sum() + apr.mortgage_amount_int.sum():,}')

March: $182,659,953
-------
April: $103,713,597
-------
Total: $286,373,550


In [36]:
70 + 49 + 46

165

In [37]:
mar['1st Grantor'].value_counts().head(60)

1st Grantor
US BK NATL ASSN                           69
LAKEVIEW LOAN SERVICING LLC               50
US BK NATL ASSN TR                        49
US BK TRUST NATL ASSN TR                  45
NATIONSTAR MTG LLC                        44
NEWREZ LLC                                38
FREEDOM MTG CORP                          33
WILMINGTON SAV FUND SOC FSB TR            32
WELLS FARGO BK NA                         25
FIFTH THIRD BK NATL ASSN                  22
PENNYMAC LOAN SERVICES LLC                21
CARRINGTON MTG SERVICES LLC               19
LOANDEPOTCOM LLC                          18
FEDERAL HOME LOAN MTG CORP TR             16
JPMORGAN CHASE BK NATL ASSN               14
DEUTSCHE BK NATL TRUST CO TR              14
BANK OF AMER NA                           12
THE BK OF NEW YORK MELLON TR              12
ROCKET MTG LLC                            10
CITIZENS BK NA                             8
METROPOLITAN LIFE INST CO                  8
PNC BK NATL ASSN                           

In [38]:
32 + 32 + 15

79

In [39]:
apr['1st Grantor'].value_counts().head(60)

1st Grantor
US BK NATL ASSN                   32
US BK TRUST NATL ASSN TR          32
LAKEVIEW LOAN SERVICING LLC       23
NATIONSTAR MTG LLC                22
NEWREZ LLC                        18
US BK NATL ASSN TR                15
JPMORGAN CHASE BK NATL ASSN       14
FEDERAL HOME LOAN MTG CORP TR     13
FREEDOM MTG CORP                  12
WELLS FARGO BK NA                 11
CITIMORTGAGE INC                  10
FIFTH THIRD BK NATL ASSN          10
PENNYMAC LOAN SERVICES LLC         9
MORTGAGE ASSETS MGMT LLC           9
BMO HARRIS BK NA                   9
DEUTSCHE BK NATL TRUST CO TR       8
CARRINGTON MTG SERVICES LLC        7
LIMA ONE CAP LLC                   7
AS 312 LLC                         6
THE HUNTINGTON NATL BK             6
PLANET HOME LENDG LLC              6
WILMINGTON SAV FUND SOC FSB TR     6
LOANDEPOTCOM LLC                   5
LOANCARE LLC                       4
BANKUNITED NA                      4
BANK OF AMER NA                    4
SERVIS ONE INC            

In [40]:
apr['1st Grantee'].value_counts()

1st Grantee
JEBB PROP 1 LLC                   7
DGG INV CO                        6
CHICAGO TITLE LAND TRUST CO TR    4
CIBROOK LTD                       3
HERNANDEZ MICHAEL                 2
                                 ..
ZAMBRANO ANTONIO                  1
OSZAKEWSKI CHRISTOPHER            1
MERINO PATRICIA                   1
REED DAMIEN J                     1
DAVIS RICHARD A                   1
Name: count, Length: 351, dtype: int64

In [41]:
df.to_csv('mar_apr_foreclosures.csv')

In [None]:
apr.sort_values(by='mortgage_amount_int',ascending=False)

In [33]:
apr['mortgage_amount_int'] = apr['mortgage_amount'].str.replace('$','')
apr['mortgage_amount_int'] = apr['mortgage_amount_int'].str.replace(',','')
apr = apr.loc[apr['mortgage_amount_int'] != 'NA']
apr['mortgage_amount_int'] = apr['mortgage_amount_int'].astype(float).fillna(0).astype(int)
apr['mortgage_amount_int'].median()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apr['mortgage_amount_int'] = apr['mortgage_amount'].str.replace('$','')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apr['mortgage_amount_int'] = apr['mortgage_amount_int'].str.replace(',','')


174510.0

In [34]:
mar['mortgage_amount_int'] = mar['mortgage_amount'].str.replace('$','')
mar['mortgage_amount_int'] = mar['mortgage_amount_int'].str.replace(',','')
mar = mar.loc[mar['mortgage_amount_int'] != 'NA']
mar['mortgage_amount_int'] = mar['mortgage_amount_int'].astype(float).fillna(0).astype(int)
mar['mortgage_amount_int'].median()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mar['mortgage_amount_int'] = mar['mortgage_amount'].str.replace('$','')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mar['mortgage_amount_int'] = mar['mortgage_amount_int'].str.replace(',','')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mar['mortgage_amount_int'] = mar['mortgage_amount_int'].a

166920.0

## Boundary Analysis

In [None]:
boundaries = gpd.read_file('Boundaries - Neighborhoods.geojson')

In [None]:
# create Point objects from lat/lon columns in df
geometry = gpd.points_from_xy(apr['lon'], apr['lat'])

# create geodataframe from df with Point objects as geometry
gdf_filings_apr = gpd.GeoDataFrame(apr, geometry=geometry)

gdf_filings_apr.crs = 'EPSG:4326'
gdf_filings_apr = gdf_filings_apr.to_crs(boundaries.crs)

# use contains method to get count of points within each neighborhood
counts_apr = gpd.sjoin(gdf_filings_apr, boundaries, predicate='within').groupby('pri_neigh').size().reset_index(name='count')

# create Point objects from lat/lon columns in df
geometry = gpd.points_from_xy(mar['lon'], mar['lat'])

# create geodataframe from df with Point objects as geometry
gdf_filings_mar = gpd.GeoDataFrame(mar, geometry=geometry)

gdf_filings_mar.crs = 'EPSG:4326'
gdf_filings_mar = gdf_filings_mar.to_crs(boundaries.crs)

# use contains method to get count of points within each neighborhood
counts_mar = gpd.sjoin(gdf_filings_feb, boundaries, predicate='within').groupby('pri_neigh').size().reset_index(name='count')

In [None]:
counts_mar

In [None]:
counts_apr