## Imports

In [2]:
import googlemaps
import pandas as pd
import numpy as np
import re
import os
import geopandas as gpd
import folium
import requests
from bs4 import BeautifulSoup
from shapely.geometry import Point

## Read-in

In [3]:
df = pd.read_csv('LISF_Aug_2023.csv')

In [4]:
len(df)

897

In [5]:
# Define the regex pattern to split the text
pattern = r'(\d+-\d+-\d+-\d+-\d+)\s(.*)'

# Apply regex and split the text into two columns
df[['PIN', 'Address']] = df['1st PIN'].str.extract(pattern)

# Remove leading/trailing whitespace from the address column
df['Address'] = df['Address'].str.strip()

In [6]:
df = df.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0','1st PIN'])

## Clean, drop, and convert

In [7]:
df['Doc Recorded'] = pd.to_datetime(df['Doc Recorded'])

In [8]:
df = df.drop_duplicates()

## Data Stuff

In [10]:
first_month = 8

In [11]:
# Count number of earliest month dates
aug_dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

print(f'Number of August dates: {aug_dates_count}')

Number of August dates: 797


In [12]:
df['1st Grantor'] = df['1st Grantor'].fillna('NA')
df['1st Grantee'] = df['1st Grantee'].fillna('NA')

In [13]:
municipal_authority = ['city', 'town', 'municipality', 'village','transit auth','department of transp']  # list of municipal authority keywords
df = df[~df['1st Grantor'].str.contains('|'.join(municipal_authority), case=False) & 
        ~df['1st Grantee'].str.contains('|'.join(municipal_authority), case=False)]

In [31]:
# create new column and assign colors based on month
df['COLOR'] = df['Doc Recorded'].apply(lambda x: 'red' if x.month == first_month else 'red' if x.month == second_month else 'blue' if x.month == third_month else '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['COLOR'] = df['Doc Recorded'].apply(lambda x: 'red' if x.month == first_month else 'red' if x.month == second_month else 'blue' if x.month == third_month else '')


## Get mortgage amounts

In [15]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'TE': 'Trailers'
}

In [16]:
def mortgage_url_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    for link in soup.find_all('a', href=True):
        if link['href'].startswith('/Document/Detail'):
            mortgage_url = 'https://crs.cookcountyclerkil.gov' + link['href']
            return mortgage_url

In [17]:
def mortgage_consi_snagger(URL, headers):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find all tr tags
    trs = soup.find_all('tr')

    # Loop through each tr tag and look for the td tag containing 'Consideration Amount' label
    for tr in trs:
        td = tr.find('th', text='Consideration Amount:')
        if td:
            # If the td tag is found, get the next td tag containing the amount
            amount_td = td.find_next_sibling('td')
            if amount_td:
                # Print the amount
                amount = amount_td.text.strip()
                return amount
            else:
                return 'not found'

In [18]:
df['mortgage_urls'] = df['deed_urls'].apply(lambda x: mortgage_url_snagger(x, headers))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mortgage_urls'] = df['deed_urls'].apply(lambda x: mortgage_url_snagger(x, headers))


In [19]:
df['mortgage_amount'] = df['mortgage_urls'].apply(lambda x: mortgage_consi_snagger(x, headers) if x is not None else None)

  td = tr.find('th', text='Consideration Amount:')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mortgage_amount'] = df['mortgage_urls'].apply(lambda x: mortgage_consi_snagger(x, headers) if x is not None else None)


In [20]:
df[['mortgage_urls','mortgage_amount']] = df[['mortgage_urls','mortgage_amount']].fillna('NA')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['mortgage_urls','mortgage_amount']] = df[['mortgage_urls','mortgage_amount']].fillna('NA')


## Geocode

In [21]:
df['geo_address'] = df['Address'] + ' Cook County, IL'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geo_address'] = df['Address'] + ' Cook County, IL'


In [22]:
len(df)

557

In [23]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [24]:
# Define the geocode function
def geocode(add):
    g = gmaps_key.geocode(add)
    if g:
        lat = g[0]["geometry"]["location"]["lat"]
        lng = g[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply geocoding to the 'geo_address' column and store the results in 'geocoded' column
df['geocoded'] = df['geo_address'].apply(geocode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geocoded'] = df['geo_address'].apply(geocode)


In [25]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geocoded'] = df['geocoded'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['lat', 'lon']] = df['geoc

## HTML Popup Formatter

In [26]:
df.columns

Index(['View Doc', 'Doc Number', 'Doc Recorded', 'Doc Executed', 'Doc Type',
       'Consi. Amt.', '1st Grantor', '1st Grantee', 'Assoc. Doc#', 'deed_urls',
       'PIN', 'Address', 'COLOR', 'mortgage_urls', 'mortgage_amount',
       'geo_address', 'geocoded', 'lat', 'lon'],
      dtype='object')

In [27]:
def popup_html(row):
    grantor = row['1st Grantor']
    grantee = row['1st Grantee']
    PIN = row['PIN']
    Address = row['Address']
    mortgage_amount = row['mortgage_amount']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Lender: </strong>{}'''.format(grantor) + '''<br>
    <strong>Borrower: </strong>{}'''.format(grantee) + '''<br>
    <strong>PIN: </strong>{}'''.format(PIN) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>Mortgage Amount: </strong>{}'''.format(mortgage_amount) + '''<br>
    </html>
    '''
    return html

In [32]:
import folium
from folium.plugins import MarkerCluster
import numpy as np

m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Cook County Pending Foreclosures')

caption_html = '''
                <p align="center" style="vertical-align: bottom; font-size:13px"><i>{}</i></p>
                '''.format('August')

### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=9.5, tiles=None)

# Create two FeatureGroups for different color pins
fg_red = folium.FeatureGroup(name='August') ## 

for index, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    color = row['COLOR']
    if pd.notnull(lat) and pd.notnull(lon) and color == 'red':
        marker = folium.CircleMarker(
            location=[lat, lon],
            radius=10,
            fill=True,
            color=color,
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_red)
    else:
        continue

# Add the FeatureGroups to the map
fg_red.add_to(m)

folium.TileLayer('OpenStreetMap', control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))
m.get_root().html.add_child(folium.Element(caption_html))
folium.TileLayer('CartoDBpositron', control=False).add_to(m) 
            
# Display map
m

In [33]:
m.save('index.html')

## Map URL Snagger

In [34]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/lis_pendens_scraper_august_2023


## Stats for story

In [35]:
df['mortgage_amount_int'] = df['mortgage_amount'].str.replace('$','')
df['mortgage_amount_int'] = df['mortgage_amount_int'].str.replace(',','')
df = df.loc[df['mortgage_amount_int'] != 'NA']
df['mortgage_amount_int'] = df['mortgage_amount_int'].astype(float).fillna(0).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mortgage_amount_int'] = df['mortgage_amount'].str.replace('$','')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mortgage_amount_int'] = df['mortgage_amount_int'].str.replace(',','')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mortgage_amount_int'] = df['mortgage_amount_int'].astype(

In [37]:
# Count number of first month dates
aug_dates_count = len(df.loc[df['Doc Recorded'].dt.month == first_month])

print(f'Number of August dates: {aug_dates_count}')

Number of August dates: 491


In [38]:
august = df.loc[df['Doc Recorded'].dt.month == first_month]

In [39]:
august.reset_index(inplace=True, drop=True)

In [40]:
august.sort_values(by='mortgage_amount_int',ascending=False).head(1)

Unnamed: 0,View Doc,Doc Number,Doc Recorded,Doc Executed,Doc Type,Consi. Amt.,1st Grantor,1st Grantee,Assoc. Doc#,deed_urls,PIN,Address,COLOR,mortgage_urls,mortgage_amount,geo_address,geocoded,lat,lon,mortgage_amount_int
267,View,2324129147,2023-08-29,8/28/2023,LIS PENDENS FORECLOSURE,,PFP 5 SCHAUMBURG LLC,LANDMARK SCHAUMBURG TOWERS LP,1815712000.0,https://crs.cookcountyclerkil.gov/Document/Det...,07-13-103-012-0000,"1400 AMERICAN LN, SCHAUMBURG",red,https://crs.cookcountyclerkil.gov/Document/Det...,"$95,500,000.00","1400 AMERICAN LN, SCHAUMBURG Cook County, IL","(42.0454065, -88.043568)",42.045406,-88.043568,95500000


In [41]:
print(august['mortgage_urls'].iloc[267])

https://crs.cookcountyclerkil.gov/Document/Detail?dId=Mjg3ODIzMTU1&hId=YjYwNmE5MGRkMDJiZGY0ZDQ0OWZkMmI0MTA1ZDc4MWFiNTEzNzJjYzE2NjkyZDdjOTExODkwNDkxZmZmZGI5Zg2


In [43]:
print(f'August: ${august.mortgage_amount_int.sum():,}')

August: $363,300,315


In [45]:
august['1st Grantor'].value_counts().head(60)

1st Grantor
US BK TRUST NATL ASSN TR                         30
WELLS FARGO BK NA                                22
US BK NATL ASSN TR                               17
NATIONSTAR MTG LLC                               17
US BK NATL ASSN                                  17
LAKEVIEW LOAN SERVICING LLC                      16
WILMINGTON SAV FUND SOC FSB TR                   14
DEUTSCHE BK NATL TRUST CO TR                     14
NEWREZ LLC                                       13
MIDFIRST BK                                      12
CARRINGTON MTG SERVICES LLC                      12
FREEDOM MTG CORP                                 11
JPMORGAN CHASE BK NATL ASSN                      10
SPECIALIZED LOAN SERVICING LLC                    9
FEDERAL HOME LOAN MTG CORP TR                     9
HSBC BK USA NATL ASSN TR                          8
PHH MTG CORP                                      8
PNC BK NATL ASSN                                  8
CITIZENS BK NA                                    8


In [46]:
august['1st Grantee'].value_counts()

1st Grantee
CHICAGO TITLE LAND TRUST CO TR    10
DISTRICT 31 LLC                    4
DORETHA WARD ENTERPRISES LLC       3
BLACK SQ REAL ESTATE INC           3
JEBB PROP I LLC                    2
                                  ..
FRIAS DALIA                        1
KOLASKI ANTHONY PETER              1
BOGRIS ATHANASIA                   1
SMITH TARA Y                       1
GARRISON ASHLEY V                  1
Name: count, Length: 470, dtype: int64

In [47]:
df.to_csv('august_2023_foreclosures.csv')

In [None]:
# apr.sort_values(by='mortgage_amount_int',ascending=False)

In [49]:
print(f"August median mortgage foreclosure amount: ${august['mortgage_amount_int'].median():,}")

August median mortgage foreclosure amount: $165,000.0


## Boundary Analysis

In [51]:
boundaries = gpd.read_file('Boundaries - Neighborhoods.geojson')

In [52]:
# create Point objects from lat/lon columns in df
geometry = gpd.points_from_xy(august['lon'], august['lat'])

# create geodataframe from df with Point objects as geometry
gdf_filings_august = gpd.GeoDataFrame(august, geometry=geometry)

gdf_filings_august.crs = 'EPSG:4326'
gdf_filings_august = gdf_filings_august.to_crs(boundaries.crs)

# use contains method to get count of points within each neighborhood
counts_august = gpd.sjoin(gdf_filings_august, boundaries, predicate='within').groupby('pri_neigh').size().reset_index(name='count')

In [56]:
counts_august['count'].sum()

233