## Imports

In [1]:
import googlemaps
import pandas as pd
import numpy as np
import re
import os
import geopandas as gpd
import folium
import requests
from bs4 import BeautifulSoup
from shapely.geometry import Point

## Read-in

In [2]:
df = pd.read_csv('LISF_May_June_2024.csv',dtype='str')

In [3]:
df['Doc Recorded'].isna().value_counts()

Doc Recorded
False    2476
Name: count, dtype: int64

In [6]:
len(df[df['Doc Recorded'].str.startswith('6')])

1002

In [7]:
# Make sure to update this
print('Enter report month as numeric. Example: January is 1, February is 2, etc...')
first_month = int(input())
print('Enter report month as named string. Example: January...February...')
current_month = input()

Enter report month as numeric. Example: January is 1, February is 2, etc...
5
Enter report month as named string. Example: January...February...
May


In [10]:
print(first_month)
print('---------')
print(current_month)

5
---------
May


In [11]:
len(df)

2476

In [12]:
# Define the regex pattern to split the text
pattern = r'(\d+-\d+-\d+-\d+-\d+)\s(.*)'

# Apply regex and split the text into two columns
df[['PIN', 'Address']] = df['1st PIN'].str.extract(pattern)

# Remove leading/trailing whitespace from the address column
df['Address'] = df['Address'].str.strip()

In [13]:
df = df.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0','1st PIN'])

## Clean, drop, and convert

In [14]:
df['Doc Recorded'] = pd.to_datetime(df['Doc Recorded'], format='mixed')

In [15]:
df = df.drop_duplicates()

In [16]:
len(df)

1876

## Data Stuff

In [10]:
# # Count number of earliest month dates
# sep_dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

# print(f'Number of September dates: {sep_dates_count}')

In [17]:
df['1st Grantor'] = df['1st Grantor'].fillna('NA')
df['1st Grantee'] = df['1st Grantee'].fillna('NA')

In [18]:
municipal_authority = ['city', 'town', 'municipality', 'village','transit auth','department of transp']  # list of municipal authority keywords
df = df[~df['1st Grantor'].str.contains('|'.join(municipal_authority), case=False) & 
        ~df['1st Grantee'].str.contains('|'.join(municipal_authority), case=False)]

In [19]:
# create new column and assign colors based on month
df['COLOR'] = df['Doc Recorded'].apply(lambda x: 'red' if x.month == first_month else '')

## Get mortgage amounts

In [20]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'TE': 'Trailers'
}

In [21]:
def mortgage_url_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    for link in soup.find_all('a', href=True):
        if link['href'].startswith('/Document/Detail'):
            mortgage_url = 'https://crs.cookcountyclerkil.gov' + link['href']
            return mortgage_url

In [22]:
def mortgage_consi_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find all tr tags
    trs = soup.find_all('tr')

    # Loop through each tr tag and look for the td tag containing 'Consideration Amount' label
    for tr in trs:
        td = tr.find('th', text='Consideration Amount:')
        if td:
            # If the td tag is found, get the next td tag containing the amount
            amount_td = td.find_next_sibling('td')
            if amount_td:
                # Print the amount
                amount = amount_td.text.strip()
                return amount
            else:
                return 'not found'

In [23]:
df['mortgage_urls'] = df['deed_urls'].apply(lambda x: mortgage_url_snagger(x, headers))

In [24]:
df['mortgage_amount'] = df['mortgage_urls'].apply(lambda x: mortgage_consi_snagger(x, headers) if x is not None else None)

  td = tr.find('th', text='Consideration Amount:')


In [25]:
df[['mortgage_urls','mortgage_amount']] = df[['mortgage_urls','mortgage_amount']].fillna('NA')

## Geocode

In [26]:
len(df)

1611

In [27]:
df['geo_address'] = df['Address'] + ' Cook County, IL'

In [28]:
len(df)

1611

In [29]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [30]:
# Define the geocode function
def geocode(add):
    g = gmaps_key.geocode(add)
    if g:
        lat = g[0]["geometry"]["location"]["lat"]
        lng = g[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply geocoding to the 'geo_address' column and store the results in 'geocoded' column
df['geocoded'] = df['geo_address'].apply(geocode)

In [31]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

## HTML Popup Formatter

In [32]:
df.columns

Index(['View Doc', 'Doc Number', 'Doc Recorded', 'Doc Executed', 'Doc Type',
       'Consi. Amt.', '1st Grantor', '1st Grantee', 'Assoc. Doc#', 'deed_urls',
       'PIN', 'Address', 'COLOR', 'mortgage_urls', 'mortgage_amount',
       'geo_address', 'geocoded', 'lat', 'lon'],
      dtype='object')

In [33]:
def popup_html(row):
    grantor = row['1st Grantor']
    grantee = row['1st Grantee']
    PIN = row['PIN']
    Address = row['Address']
    mortgage_amount = row['mortgage_amount']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Lender: </strong>{}'''.format(grantor) + '''<br>
    <strong>Borrower: </strong>{}'''.format(grantee) + '''<br>
    <strong>PIN: </strong>{}'''.format(PIN) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>Mortgage Amount: </strong>{}'''.format(mortgage_amount) + '''<br>
    </html>
    '''
    return html

In [34]:
df['COLOR'].value_counts()

COLOR
red    913
       698
Name: count, dtype: int64

In [71]:
# df.to_csv("backup_copy.csv")

In [35]:
df['COLOR'] = df['COLOR'].replace('', 'yellow')

In [38]:
import folium
from folium.plugins import MarkerCluster
import numpy as np

m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Cook County Pending Foreclosures')

caption_html = '''
                <p align="center" style="vertical-align: bottom; font-size:13px"><i>{}</i></p>
                '''.format(f'May and June')

### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=9.5, tiles=None)

# Create two FeatureGroups for different color pins
fg_red = folium.FeatureGroup(name=f'May') ## 
fg_yellow = folium.FeatureGroup(name='June')

for index, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    color = row['COLOR']
    if pd.notnull(lat) and pd.notnull(lon) and color == 'red':
        marker = folium.CircleMarker(
            location=[lat, lon],
            radius=5,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_red)
    elif pd.notnull(lat) and pd.notnull(lon) and color == 'yellow':
        marker = folium.CircleMarker(
            location=[lat, lon],
            radius=5,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_yellow)
    else:
        continue

# Add the FeatureGroups to the map
fg_red.add_to(m)
fg_yellow.add_to(m)

folium.TileLayer('OpenStreetMap', control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))
m.get_root().html.add_child(folium.Element(caption_html))
folium.TileLayer('CartoDBpositron', control=False).add_to(m) 
            
# Display map
m

In [39]:
m.save('index.html')

## Map URL Snagger

In [40]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/lis_pendens_scraper_may_june_2024


## Stats for story

In [61]:
df['mortgage_amount_int'] = df['mortgage_amount'].str.replace('$','')
df['mortgage_amount_int'] = df['mortgage_amount_int'].str.replace(',','')
df = df.loc[df['mortgage_amount_int'] != 'NA']
df['mortgage_amount_int'] = df['mortgage_amount_int'].astype(float).fillna(0).astype(int)

In [62]:
# Count number of first month dates
dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

print(f'Number of {current_month} dates: {dates_count}')

Number of May dates: 845


In [60]:
df['Doc Recorded'].dt.month.value_counts()

Doc Recorded
5    845
6    632
Name: count, dtype: int64

In [63]:
# Filter for March
df_may = df[df['Doc Recorded'].dt.month == 5]

# Filter for April
df_june = df[df['Doc Recorded'].dt.month == 6]

In [64]:
df_may.reset_index(inplace=True, drop=True)
df_june.reset_index(inplace=True, drop=True)

In [36]:
# x.at[123,'mortgage_amount_int']=33130000
# x.at[123,'mortgage_amount']='$33,130,000'

# x.at[16,'mortgage_amount_int']=173600
# x.at[16,'mortgage_amount']='$173,600'

In [65]:
print(f'May foreclosures: {len(df_march)}')
print('----------')
print(f'June foreclosures: {len(df_april)}')

May foreclosures: 845
----------
June foreclosures: 632


In [53]:
df_may.sort_values(by='mortgage_amount_int',ascending=False).head(2)

Unnamed: 0,View Doc,Doc Number,Doc Recorded,Doc Executed,Doc Type,Consi. Amt.,1st Grantor,1st Grantee,Assoc. Doc#,deed_urls,PIN,Address,COLOR,mortgage_urls,mortgage_amount,geo_address,geocoded,lat,lon,mortgage_amount_int
8,View,2412308062,2024-05-02,4/30/2024,LIS PENDENS FORECLOSURE,,BANK OF AMER NA,3 FNP OWNER LLC,1823534065.0,https://crs.cookcountyclerkil.gov/Document/Det...,17-09-462-015-0000,"70 W MADISON ST, CHICAGO",red,https://crs.cookcountyclerkil.gov/Document/Det...,"$305,000,000.00","70 W MADISON ST, CHICAGO Cook County, IL","(41.8821962, -87.6300878)",41.882196,-87.630088,305000000
624,View,2415009013,2024-05-29,,LIS PENDENS FORECLOSURE,,CGCMT 2016-P5 SOUTH MICHIGAN AVE LLC,332 PROP LLC,1627255096.0,https://crs.cookcountyclerkil.gov/Document/Det...,17-15-107-018-0000,"330 S MICHIGAN AVE, CHICAGO",red,https://crs.cookcountyclerkil.gov/Document/Det...,"$32,500,000.00","330 S MICHIGAN AVE, CHICAGO Cook County, IL","(41.877479, -87.6245598)",41.877479,-87.62456,32500000


In [54]:
print(df_may.iloc[624]['mortgage_urls'])

https://crs.cookcountyclerkil.gov/Document/Detail?dId=NDIyODY2MzI1&hId=ZGUxNDU1ZjFkYWFkMWNlZTkzYmZmNTRmNzhmMzg2OWMzMWQ0ZTYxMzk5ZmJmNGFiZmE4MjQ2MzE0NTY5NmI2ZA2


In [55]:
df_june.sort_values(by='mortgage_amount_int',ascending=False).iloc[0:1]

Unnamed: 0,View Doc,Doc Number,Doc Recorded,Doc Executed,Doc Type,Consi. Amt.,1st Grantor,1st Grantee,Assoc. Doc#,deed_urls,PIN,Address,COLOR,mortgage_urls,mortgage_amount,geo_address,geocoded,lat,lon,mortgage_amount_int
621,View,2416424550,2024-06-12,,LIS PENDENS FORECLOSURE,,VMC FIN 2021-FL4 LLC,SCRE II SHAFER CRT LIMITED PARTNERSHIP,1934506145.0,https://crs.cookcountyclerkil.gov/Document/Det...,12-03-100-015-0000,"6400 SHAFER CT, ROSEMONT",yellow,https://crs.cookcountyclerkil.gov/Document/Det...,"$17,900,000.00","6400 SHAFER CT, ROSEMONT Cook County, IL","(41.9945895, -87.8642756)",41.994589,-87.864276,17900000


In [56]:
print(df_june['mortgage_urls'].iloc[621])

https://crs.cookcountyclerkil.gov/Document/Detail?dId=MjYwOTk4MjE1&hId=YzIwZjJkYmEyZDNhZWE0YTU4OTc1YjQ5OTQ0YmYwMmJkNDVmNGY3NjM5NjQ1NmYxNTM5MjYwZThiZDBiYWU0MQ2


In [57]:
print(f'{current_month}: ${df_may.mortgage_amount_int.sum():,}')

May: $552,713,746


In [58]:
print(f'June: ${df_june.mortgage_amount_int.sum():,}')

June: $161,296,165


In [59]:
252_713_746 + 161_296_165

414009911

In [71]:
52 + 39 + 24 + 13

128

In [70]:
df_may['1st Grantor'].value_counts().head(60)

1st Grantor
MIDFIRST BANK                             65
US BANK TRUST NATIONAL ASSN TR            52
LAKEVIEW LOAN SERVICING LLC               43
NATIONSTAR MTG LLC                        42
US BK NATIONAL ASSOCIATION                39
JPMORGAN CHASE BK NATIONAL ASSOCIATION    31
WILMINGTON SAV FUND SOCIETY FSB TR        30
DEUTSCHE BK NATIONAL TRUST COMPANY TR     29
NEWREZ LLC                                29
US BANK NATIONAL ASSN TR                  24
FIFTH THIRD BK NATIONAL ASSOCIATION       21
PENNYMAC LOAN SERVICES LLC                20
WELLS FARGO BK NA                         19
FREEDOM MTG CORPORATION                   18
HUNTINGTON NATL BANK                      13
CARRINGTON MTG SERVICES LLC               13
US BANK TRUST COMPANY NATIONAL ASSN TR    13
ROCKET MTG LLC                            13
FEDERAL HOME LOAN MORTGAGE CORP TR        12
GREENSPRING CAP MANAGEMENT LLC            12
PNC BK NATIONAL ASSOCIATION               11
CROSSCOUNTRY MTG LLC                      1

In [73]:
32 + 27 + 20 + 9 

88

In [72]:
df_june['1st Grantor'].value_counts().head(60)

1st Grantor
NEWREZ LLC                                  36
US BK NATIONAL ASSOCIATION                  32
NATIONSTAR MTG LLC                          32
US BANK TRUST NATIONAL ASSN TR              27
LAKEVIEW LOAN SERVICING LLC                 26
FEDERAL HOME LOAN MORTGAGE CORP TR          23
DEUTSCHE BK NATIONAL TRUST COMPANY TR       23
FREEDOM MTG CORPORATION                     20
US BANK NATIONAL ASSN TR                    20
WILMINGTON SAV FUND SOCIETY FSB TR          19
MIDFIRST BANK                               17
WELLS FARGO BK NA                           15
PENNYMAC LOAN SERVICES LLC                  14
PNC BK NATIONAL ASSOCIATION                 13
CARRINGTON MTG SERVICES LLC                 12
JPMORGAN CHASE BK NATIONAL ASSOCIATION      12
PHH MTG CORPORATION                         12
FIFTH THIRD BK NATIONAL ASSOCIATION         11
LOANDEPOTCOM LLC                            10
BANK OF NEW YORK MELLON TR                   9
FIFTH THIRD BK NA                            9
U

In [113]:
df_march['1st Grantee'].value_counts()

1st Grantee
CHICAGO TITLE LAND TRUST CO TR    4
HUSSAIN ALI                       2
PORTER JASON                      2
OLSHANSKY EDWARD                  2
ILLINOIS HOUSING DEV AUTHORITY    2
                                 ..
SLAY JAMAAR                       1
FERNANDEZ IRENE A                 1
PARKER EFEREM                     1
DIXON TAMIRRA                     1
RAZZAQ ALI                        1
Name: count, Length: 1020, dtype: int64

In [114]:
df_april['1st Grantee'].value_counts()

1st Grantee
ALJ INVESTMENTS INC               5
JACKSON ANDRE L                   5
LV HALSTED LLC                    4
JACKSON ANDRE                     4
US BANK TRUST NATIONAL ASSN TR    2
                                 ..
FLETCHER DOUGLAS G JR             1
HAMLET DEQUINCY L                 1
RELIFORD BILLY G JR               1
MEDINA ROCIO                      1
RUPERT CURTIS                     1
Name: count, Length: 886, dtype: int64

In [95]:
df.to_csv(f'{current_month}_June_2024_foreclosures.csv')

In [44]:
# apr.sort_values(by='mortgage_amount_int',ascending=False)

In [66]:
print(f"{current_month} median mortgage foreclosure amount: ${df_may['mortgage_amount_int'].median():,}")

May median mortgage foreclosure amount: $171,400.0


In [67]:
print(f"April median mortgage foreclosure amount: ${df_june['mortgage_amount_int'].median():,}")

April median mortgage foreclosure amount: $161,025.0


## Boundary Analysis

In [117]:
boundaries = gpd.read_file('Boundaries - Neighborhoods.geojson')

In [118]:
# create Point objects from lat/lon columns in df
geometry = gpd.points_from_xy(x['lon'], x['lat'])

# create geodataframe from df with Point objects as geometry
gdf_filings_x = gpd.GeoDataFrame(x, geometry=geometry)

gdf_filings_x.crs = 'EPSG:4326'
gdf_filings_x = gdf_filings_x.to_crs(boundaries.crs)

# use contains method to get count of points within each neighborhood
counts_x = gpd.sjoin(gdf_filings_x, boundaries, predicate='within').groupby('pri_neigh').size().reset_index(name='count')

In [119]:
counts_x['count'].sum()

517