## Imports

In [1]:
import googlemaps
import pandas as pd
import numpy as np
import re
import os
import geopandas as gpd
import folium
import requests
from bs4 import BeautifulSoup
from shapely.geometry import Point

## Read-in

In [2]:
df = pd.read_csv('LISF_Sep_2023.csv')

In [3]:
len(df)

518

In [4]:
# Define the regex pattern to split the text
pattern = r'(\d+-\d+-\d+-\d+-\d+)\s(.*)'

# Apply regex and split the text into two columns
df[['PIN', 'Address']] = df['1st PIN'].str.extract(pattern)

# Remove leading/trailing whitespace from the address column
df['Address'] = df['Address'].str.strip()

In [5]:
df = df.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0','1st PIN'])

## Clean, drop, and convert

In [6]:
df['Doc Recorded'] = pd.to_datetime(df['Doc Recorded'])

In [7]:
df = df.drop_duplicates()

## Data Stuff

In [8]:
first_month = 9

In [36]:
# # Count number of earliest month dates
# sep_dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

# print(f'Number of September dates: {sep_dates_count}')

In [10]:
df['1st Grantor'] = df['1st Grantor'].fillna('NA')
df['1st Grantee'] = df['1st Grantee'].fillna('NA')

In [11]:
municipal_authority = ['city', 'town', 'municipality', 'village','transit auth','department of transp']  # list of municipal authority keywords
df = df[~df['1st Grantor'].str.contains('|'.join(municipal_authority), case=False) & 
        ~df['1st Grantee'].str.contains('|'.join(municipal_authority), case=False)]

In [12]:
# create new column and assign colors based on month
df['COLOR'] = df['Doc Recorded'].apply(lambda x: 'red' if x.month == first_month else 'red' if x.month == second_month else 'blue' if x.month == third_month else '')

## Get mortgage amounts

In [13]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'TE': 'Trailers'
}

In [14]:
def mortgage_url_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    for link in soup.find_all('a', href=True):
        if link['href'].startswith('/Document/Detail'):
            mortgage_url = 'https://crs.cookcountyclerkil.gov' + link['href']
            return mortgage_url

In [15]:
def mortgage_consi_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find all tr tags
    trs = soup.find_all('tr')

    # Loop through each tr tag and look for the td tag containing 'Consideration Amount' label
    for tr in trs:
        td = tr.find('th', text='Consideration Amount:')
        if td:
            # If the td tag is found, get the next td tag containing the amount
            amount_td = td.find_next_sibling('td')
            if amount_td:
                # Print the amount
                amount = amount_td.text.strip()
                return amount
            else:
                return 'not found'

In [16]:
df['mortgage_urls'] = df['deed_urls'].apply(lambda x: mortgage_url_snagger(x, headers))

In [17]:
df['mortgage_amount'] = df['mortgage_urls'].apply(lambda x: mortgage_consi_snagger(x, headers) if x is not None else None)

  td = tr.find('th', text='Consideration Amount:')


In [18]:
df[['mortgage_urls','mortgage_amount']] = df[['mortgage_urls','mortgage_amount']].fillna('NA')

## Geocode

In [19]:
df['geo_address'] = df['Address'] + ' Cook County, IL'

In [20]:
len(df)

297

In [21]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [22]:
# Define the geocode function
def geocode(add):
    g = gmaps_key.geocode(add)
    if g:
        lat = g[0]["geometry"]["location"]["lat"]
        lng = g[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply geocoding to the 'geo_address' column and store the results in 'geocoded' column
df['geocoded'] = df['geo_address'].apply(geocode)

In [23]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

## HTML Popup Formatter

In [24]:
df.columns

Index(['View Doc', 'Doc Number', 'Doc Recorded', 'Doc Executed', 'Doc Type',
       'Consi. Amt.', '1st Grantor', '1st Grantee', 'Assoc. Doc#', 'deed_urls',
       'PIN', 'Address', 'COLOR', 'mortgage_urls', 'mortgage_amount',
       'geo_address', 'geocoded', 'lat', 'lon'],
      dtype='object')

In [25]:
def popup_html(row):
    grantor = row['1st Grantor']
    grantee = row['1st Grantee']
    PIN = row['PIN']
    Address = row['Address']
    mortgage_amount = row['mortgage_amount']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Lender: </strong>{}'''.format(grantor) + '''<br>
    <strong>Borrower: </strong>{}'''.format(grantee) + '''<br>
    <strong>PIN: </strong>{}'''.format(PIN) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>Mortgage Amount: </strong>{}'''.format(mortgage_amount) + '''<br>
    </html>
    '''
    return html

In [26]:
import folium
from folium.plugins import MarkerCluster
import numpy as np

m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Cook County Pending Foreclosures')

caption_html = '''
                <p align="center" style="vertical-align: bottom; font-size:13px"><i>{}</i></p>
                '''.format('August')

### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=9.5, tiles=None)

# Create two FeatureGroups for different color pins
fg_red = folium.FeatureGroup(name='August') ## 

for index, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    color = row['COLOR']
    if pd.notnull(lat) and pd.notnull(lon) and color == 'red':
        marker = folium.CircleMarker(
            location=[lat, lon],
            radius=5,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_red)
    else:
        continue

# Add the FeatureGroups to the map
fg_red.add_to(m)

folium.TileLayer('OpenStreetMap', control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))
m.get_root().html.add_child(folium.Element(caption_html))
folium.TileLayer('CartoDBpositron', control=False).add_to(m) 
            
# Display map
m

In [27]:
m.save('index.html')

## Map URL Snagger

In [28]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/lis_pendens_scraper_september_2023


## Stats for story

In [29]:
df['mortgage_amount_int'] = df['mortgage_amount'].str.replace('$','')
df['mortgage_amount_int'] = df['mortgage_amount_int'].str.replace(',','')
df = df.loc[df['mortgage_amount_int'] != 'NA']
df['mortgage_amount_int'] = df['mortgage_amount_int'].astype(float).fillna(0).astype(int)

In [30]:
# Count number of first month dates
sep_dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

print(f'Number of September dates: {sep_dates_count}')

Number of September dates: 251


In [31]:
september = df.loc[df['Doc Recorded'].dt.month == first_month]

In [32]:
september.reset_index(inplace=True, drop=True)

In [47]:
september.at[205,'mortgage_amount_int']=96900
september.at[205,'mortgage_amount']='$96,900'

In [48]:
september.sort_values(by='mortgage_amount_int',ascending=False).head(1)

Unnamed: 0,View Doc,Doc Number,Doc Recorded,Doc Executed,Doc Type,Consi. Amt.,1st Grantor,1st Grantee,Assoc. Doc#,deed_urls,...,Address,COLOR,mortgage_urls,mortgage_amount,geo_address,geocoded,lat,lon,mortgage_amount_int,205
16,View,2327215000.0,2023-09-29,9/28/2023,LIS PENDENS FORECLOSURE,,ELIZON DB TRANSFER AGENT LLC,1419 PARTNERS LLC,2202719000.0,https://crs.cookcountyclerkil.gov/Document/Det...,...,"1419 N WELLS ST, CHICAGO",red,https://crs.cookcountyclerkil.gov/Document/Det...,"$10,400,000.00","1419 N WELLS ST, CHICAGO Cook County, IL","(41.9083501, -87.63424549999999)",41.90835,-87.634245,10400000.0,


In [49]:
print(september['mortgage_urls'].iloc[16])

https://crs.cookcountyclerkil.gov/Document/Detail?dId=MzE2MTY1NTI1&hId=ZDRkMDAzODc1NjBkZWE3NTYwNDc4MjU1OGU1ZmFiZGE0MzViOTAyM2NlNWJlYmU5ODQ1MTEyMmRiNzgwZTAzZA2


In [50]:
print(f'September: ${september.mortgage_amount_int.sum():,}')

September: $80,026,582.0


In [56]:
# August
29 + 9

38

In [51]:
september['1st Grantor'].value_counts().head(60)

1st Grantor
US BK TRUST NATL ASSN TR                    29
NEWREZ LLC                                  19
WILMINGTON SAV FUND SOC FSB TR              11
US BK NATL ASSN TR                           9
WELLS FARGO BK NA                            9
LOANDEPOTCOM LLC                             8
GITSIT SOLUTIONS LLC                         8
JPMORGAN CHASE BK NATL ASSN                  7
BMO BK NA                                    7
CARRINGTON MTG SERVICES LLC                  6
MORTGAGE ASSETS MGMT LLC                     5
HUNTINGTON NATL BK                           5
ROCKET MTG LLC                               4
LAKEVIEW LOAN SERVICING LLC                  4
MCLP ASSET CO INC                            4
BMO HARRIS BK NA                             4
US BK NATL ASSN                              4
CITIBANK NA TR                               3
ARVEST BK                                    3
OLD NATL BK                                  3
F ST INVESTMENTS LLC                         2
S

In [52]:
september['1st Grantee'].value_counts()

1st Grantee
BLACK SQ REAL ESTATE INC                          3
DORETHA WARD ENTERPRISES LLC                      2
UNKNOWN                                           2
CHICAGO TITLE LAND TRUST CO TR                    2
DEVOIL SOLUTIONS LLC                              2
                                                 ..
PROFESSIONALLY AFFORDABLE HOMES LLC - SERIES 1    1
ABRON ALONZO                                      1
TEENY TURNER LLC                                  1
MCCUAN MARIBETH                                   1
DEMIRDJIAN ROUPEN STEVEN                          1
Name: count, Length: 244, dtype: int64

In [53]:
df.to_csv('september_2023_foreclosures.csv')

In [None]:
# apr.sort_values(by='mortgage_amount_int',ascending=False)

In [55]:
print(f"September median mortgage foreclosure amount: ${september['mortgage_amount_int'].median():,}")

September median mortgage foreclosure amount: $171,845.0


## Boundary Analysis

In [None]:
boundaries = gpd.read_file('Boundaries - Neighborhoods.geojson')

In [None]:
# create Point objects from lat/lon columns in df
geometry = gpd.points_from_xy(september['lon'], august['lat'])

# create geodataframe from df with Point objects as geometry
gdf_filings_august = gpd.GeoDataFrame(august, geometry=geometry)

gdf_filings_august.crs = 'EPSG:4326'
gdf_filings_august = gdf_filings_august.to_crs(boundaries.crs)

# use contains method to get count of points within each neighborhood
counts_august = gpd.sjoin(gdf_filings_august, boundaries, predicate='within').groupby('pri_neigh').size().reset_index(name='count')

In [None]:
counts_august['count'].sum()