## Imports

In [3]:
import googlemaps
import pandas as pd
import numpy as np
import re
import os
import geopandas as gpd
import folium
import requests
from bs4 import BeautifulSoup
from shapely.geometry import Point

## Read-in

In [44]:
df = pd.read_csv('LISF_Feb_2024.csv',dtype='str')

In [45]:
# Make sure to update this
print('Enter report month as numeric. Example: January is 1, February is 2, etc...')
first_month = int(input())
print('Enter report month as named string. Example: January...February...')
current_month = input()

Enter report month as numeric. Example: January is 1, February is 2, etc...
2
Enter report month as named string. Example: January...February...
February


In [46]:
print(first_month)
print('---------')
print(current_month)

2
---------
February


In [47]:
len(df)

656

In [48]:
# Define the regex pattern to split the text
pattern = r'(\d+-\d+-\d+-\d+-\d+)\s(.*)'

# Apply regex and split the text into two columns
df[['PIN', 'Address']] = df['1st PIN'].str.extract(pattern)

# Remove leading/trailing whitespace from the address column
df['Address'] = df['Address'].str.strip()

In [49]:
df = df.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0','1st PIN'])

## Clean, drop, and convert

In [50]:
df['Doc Recorded'] = pd.to_datetime(df['Doc Recorded'], format='mixed')

In [51]:
df = df.drop_duplicates()

In [52]:
len(df)

556

## Data Stuff

In [10]:
# # Count number of earliest month dates
# sep_dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

# print(f'Number of September dates: {sep_dates_count}')

In [53]:
df['1st Grantor'] = df['1st Grantor'].fillna('NA')
df['1st Grantee'] = df['1st Grantee'].fillna('NA')

In [54]:
municipal_authority = ['city', 'town', 'municipality', 'village','transit auth','department of transp']  # list of municipal authority keywords
df = df[~df['1st Grantor'].str.contains('|'.join(municipal_authority), case=False) & 
        ~df['1st Grantee'].str.contains('|'.join(municipal_authority), case=False)]

In [55]:
# create new column and assign colors based on month
df['COLOR'] = df['Doc Recorded'].apply(lambda x: 'red' if x.month == first_month else '')

## Get mortgage amounts

In [57]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'TE': 'Trailers'
}

In [58]:
def mortgage_url_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    for link in soup.find_all('a', href=True):
        if link['href'].startswith('/Document/Detail'):
            mortgage_url = 'https://crs.cookcountyclerkil.gov' + link['href']
            return mortgage_url

In [59]:
def mortgage_consi_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find all tr tags
    trs = soup.find_all('tr')

    # Loop through each tr tag and look for the td tag containing 'Consideration Amount' label
    for tr in trs:
        td = tr.find('th', text='Consideration Amount:')
        if td:
            # If the td tag is found, get the next td tag containing the amount
            amount_td = td.find_next_sibling('td')
            if amount_td:
                # Print the amount
                amount = amount_td.text.strip()
                return amount
            else:
                return 'not found'

In [60]:
df['mortgage_urls'] = df['deed_urls'].apply(lambda x: mortgage_url_snagger(x, headers))

In [61]:
df['mortgage_amount'] = df['mortgage_urls'].apply(lambda x: mortgage_consi_snagger(x, headers) if x is not None else None)

  td = tr.find('th', text='Consideration Amount:')


In [62]:
df[['mortgage_urls','mortgage_amount']] = df[['mortgage_urls','mortgage_amount']].fillna('NA')

## Geocode

In [63]:
df['geo_address'] = df['Address'] + ' Cook County, IL'

In [64]:
len(df)

372

In [65]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [66]:
# Define the geocode function
def geocode(add):
    g = gmaps_key.geocode(add)
    if g:
        lat = g[0]["geometry"]["location"]["lat"]
        lng = g[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply geocoding to the 'geo_address' column and store the results in 'geocoded' column
df['geocoded'] = df['geo_address'].apply(geocode)

In [67]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

## HTML Popup Formatter

In [68]:
df.columns

Index(['View Doc', 'Doc Number', 'Doc Recorded', 'Doc Executed', 'Doc Type',
       'Consi. Amt.', '1st Grantor', '1st Grantee', 'Assoc. Doc#', 'deed_urls',
       'PIN', 'Address', 'COLOR', 'mortgage_urls', 'mortgage_amount',
       'geo_address', 'geocoded', 'lat', 'lon'],
      dtype='object')

In [69]:
def popup_html(row):
    grantor = row['1st Grantor']
    grantee = row['1st Grantee']
    PIN = row['PIN']
    Address = row['Address']
    mortgage_amount = row['mortgage_amount']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Lender: </strong>{}'''.format(grantor) + '''<br>
    <strong>Borrower: </strong>{}'''.format(grantee) + '''<br>
    <strong>PIN: </strong>{}'''.format(PIN) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>Mortgage Amount: </strong>{}'''.format(mortgage_amount) + '''<br>
    </html>
    '''
    return html

In [70]:
import folium
from folium.plugins import MarkerCluster
import numpy as np

m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Cook County Pending Foreclosures')

caption_html = '''
                <p align="center" style="vertical-align: bottom; font-size:13px"><i>{}</i></p>
                '''.format(f'{current_month}')

### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=9.5, tiles=None)

# Create two FeatureGroups for different color pins
fg_red = folium.FeatureGroup(name=f'{current_month}') ## 

for index, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    color = row['COLOR']
    if pd.notnull(lat) and pd.notnull(lon) and color == 'red':
        marker = folium.CircleMarker(
            location=[lat, lon],
            radius=5,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_red)
    else:
        continue

# Add the FeatureGroups to the map
fg_red.add_to(m)

folium.TileLayer('OpenStreetMap', control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))
m.get_root().html.add_child(folium.Element(caption_html))
folium.TileLayer('CartoDBpositron', control=False).add_to(m) 
            
# Display map
m

In [71]:
m.save('index.html')

## Map URL Snagger

In [72]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/lis_pendens_scraper_february_2024


## Stats for story

In [73]:
df['mortgage_amount_int'] = df['mortgage_amount'].str.replace('$','')
df['mortgage_amount_int'] = df['mortgage_amount_int'].str.replace(',','')
df = df.loc[df['mortgage_amount_int'] != 'NA']
df['mortgage_amount_int'] = df['mortgage_amount_int'].astype(float).fillna(0).astype(int)

In [74]:
# Count number of first month dates
dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

print(f'Number of {current_month} dates: {dates_count}')

Number of February dates: 317


In [75]:
df['Doc Recorded'].dt.month.value_counts()

Doc Recorded
2    317
Name: count, dtype: int64

In [126]:
# df.loc[df['Doc Recorded'].dt.month == first_month]

In [76]:
x = df.loc[df['Doc Recorded'].dt.month == first_month]

In [77]:
x.reset_index(inplace=True, drop=True)

In [36]:
# x.at[123,'mortgage_amount_int']=33130000
# x.at[123,'mortgage_amount']='$33,130,000'

# x.at[16,'mortgage_amount_int']=173600
# x.at[16,'mortgage_amount']='$173,600'

In [80]:
x.sort_values(by='mortgage_amount_int',ascending=False).iloc[0:1]

Unnamed: 0,View Doc,Doc Number,Doc Recorded,Doc Executed,Doc Type,Consi. Amt.,1st Grantor,1st Grantee,Assoc. Doc#,deed_urls,PIN,Address,COLOR,mortgage_urls,mortgage_amount,geo_address,geocoded,lat,lon,mortgage_amount_int
98,View,2405857010,2024-02-27,2/26/2024,LIS PENDENS FORECLOSURE,,CAF BRIDGE BORROWER WF CRE LLC,TRITON REALTY GRP LLC,2231225006.0,https://crs.cookcountyclerkil.gov/Document/Det...,17-04-222-063-1001,"1211 N LA SALLE ST, CHICAGO",red,https://crs.cookcountyclerkil.gov/Document/Det...,"$26,375,000.00","1211 N LA SALLE ST, CHICAGO Cook County, IL","(41.9042995, -87.6326984)",41.9043,-87.632698,26375000


In [81]:
print(x['mortgage_urls'].iloc[98])

https://crs.cookcountyclerkil.gov/Document/Detail?dId=MzI3MTcwMTc1&hId=NTU0YWQxYTQ5ZTM5YWMzZWI0Yzg1NjJlNTcyNGY5NTk5MmYxNjM2MGI5YmUwOThlNGI1YmJmMzE2Y2M5NzlmMQ2


In [82]:
print(f'{current_month}: ${x.mortgage_amount_int.sum():,}')

February: $119,490,126


In [87]:
# February (US Bank)
19 + 12 + 2 + 2

35

In [86]:
# print(x[x['1st Grantor'] == 'NEWREZ LLC']['deed_urls'].iloc[1])

In [85]:
x['1st Grantor'].value_counts().head(60)

1st Grantor
WILMINGTON SAV FUND SOCIETY FSB TR        28
NEWREZ LLC                                26
US BANK TRUST NATIONAL ASSN TR            19
CYMBIDIUM RESTORATION TRUST               19
WELLS FARGO BK NA                         13
US BANK NATIONAL ASSN TR                  12
DEUTSCHE BK NATIONAL TRUST COMPANY TR      9
NATIONSTAR MTG LLC                         8
LAKEVIEW LOAN SERVICING LLC                8
FREEDOM MTG CORPORATION                    8
HILLSIDE CONDO ASSOCIATION                 5
ROCKET MTG LLC                             5
MORTGAGE ASSETS MGMT LLC                   5
CARRINGTON MTG SERVICES LLC                4
HSBC BANK USA NATIONAL ASSN TR             4
PENNYMAC LOAN SERVICES LLC                 4
OLD NATL BANK                              4
BMO BK NA                                  3
CITIBANK NA TR                             3
WELLS FARGO BANK NATIONAL ASSN TR          3
PHH MTG CORPORATION                        3
METROPOLITAN LIFE INST COMPANY             

In [99]:
x['1st Grantee'].value_counts()

1st Grantee
CHICAGO TITLE LAND TRUST CO TR           4
MBBI CLASSIC BLDR LLC                    2
DORSETT RESTORATION AND RECOVERY CORP    2
MILOVAC WENDY M                          2
KLUJIAN PETER                            2
                                        ..
213 48 LLC                               1
CARTER ADAM E                            1
BRASILE JOAN MARIE                       1
GURTZ ADAM                               1
TNT PROP USA LLC                         1
Name: count, Length: 310, dtype: int64

In [100]:
df.to_csv(f'{current_month}_2024_foreclosures.csv')

In [44]:
# apr.sort_values(by='mortgage_amount_int',ascending=False)

In [79]:
print(f"{current_month} median mortgage foreclosure amount: ${x['mortgage_amount_int'].median():,}")

February median mortgage foreclosure amount: $166,822.0


## Boundary Analysis

In [None]:
boundaries = gpd.read_file('Boundaries - Neighborhoods.geojson')

In [None]:
# create Point objects from lat/lon columns in df
geometry = gpd.points_from_xy(x['lon'], x['lat'])

# create geodataframe from df with Point objects as geometry
gdf_filings_x = gpd.GeoDataFrame(x, geometry=geometry)

gdf_filings_x.crs = 'EPSG:4326'
gdf_filings_x = gdf_filings_x.to_crs(boundaries.crs)

# use contains method to get count of points within each neighborhood
counts_x = gpd.sjoin(gdf_filings_x, boundaries, predicate='within').groupby('pri_neigh').size().reset_index(name='count')

In [None]:
counts_x['count'].sum()