## Imports

In [77]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm
import googlemaps
import numpy as np
import re
import os
import folium
from IPython.display import clear_output

## PD Set Options

In [78]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 800)

## Data read-in

In [79]:
df = pd.read_excel('Copy of 230209.2021CCAORetentionPINs.xlsx')

## Data clean

In [80]:
df['PARID'] = df['PARID'].str.replace('-','')

## Test block

In [81]:
sample_pin = '17162050160000'
response = requests.get(f'https://www.cookcountyassessor.com/pin/{sample_pin}')
soup = BeautifulSoup(response.content, 'html.parser')
address = soup.find('span', text='Address').find_next('span').text
print(address)

10 S DEARBORN ST


## Set up scraper

In [109]:
def address_snagger(PARID):
    clear_output()
    PIN = PARID['PARID']
    try:
        response = requests.get(f'https://www.cookcountyassessor.com/pin/{PIN}')
        soup = BeautifulSoup(response.content, 'html.parser')
        address = soup.find('span', text='Address').find_next('span').text
        return address
    except Exception as e:
        return PIN

In [83]:
address_list = [address_snagger(row) for row in tqdm(df.to_dict('records'))]


 36%|█████████████▉                         | 200/559 [33:41<1:00:27, 10.11s/it][A

  0%|                                           | 1/559 [00:00<03:40,  2.53it/s][A
  0%|▏                                          | 2/559 [00:00<02:29,  3.73it/s][A
  1%|▏                                          | 3/559 [00:00<01:59,  4.67it/s][A
  1%|▎                                          | 4/559 [00:00<01:47,  5.18it/s][A
  1%|▍                                          | 5/559 [00:01<01:35,  5.77it/s][A
  1%|▍                                          | 6/559 [00:01<01:36,  5.70it/s][A
  1%|▌                                          | 7/559 [00:01<01:35,  5.77it/s][A
  1%|▌                                          | 8/559 [00:01<01:32,  5.93it/s][A
  2%|▋                                          | 9/559 [00:01<01:33,  5.88it/s][A
  2%|▊                                         | 10/559 [00:01<01:30,  6.03it/s][A
  2%|▊                                         | 11/559 [00:02<01:30,  6.0

 17%|███████▎                                  | 97/559 [00:15<01:10,  6.57it/s][A
 18%|███████▎                                  | 98/559 [00:15<01:06,  6.88it/s][A
 18%|███████▍                                  | 99/559 [00:15<01:10,  6.55it/s][A
 18%|███████▎                                 | 100/559 [00:15<01:10,  6.55it/s][A
 18%|███████▍                                 | 101/559 [00:16<01:07,  6.80it/s][A
 18%|███████▍                                 | 102/559 [00:16<01:05,  6.97it/s][A
 18%|███████▌                                 | 103/559 [00:16<01:07,  6.76it/s][A
 19%|███████▋                                 | 104/559 [00:16<01:03,  7.15it/s][A
 19%|███████▋                                 | 105/559 [00:16<01:02,  7.31it/s][A
 19%|███████▊                                 | 106/559 [00:16<01:00,  7.50it/s][A
 19%|███████▊                                 | 107/559 [00:16<01:01,  7.38it/s][A
 19%|███████▉                                 | 108/559 [00:16<01:03,  7.16i

 35%|██████████████▏                          | 194/559 [00:30<01:18,  4.65it/s][A
 35%|██████████████▎                          | 195/559 [00:30<01:08,  5.31it/s][A
 35%|██████████████▍                          | 196/559 [00:30<01:03,  5.76it/s][A
 35%|██████████████▍                          | 197/559 [00:30<00:57,  6.28it/s][A
 35%|██████████████▌                          | 198/559 [00:30<00:55,  6.52it/s][A
 36%|██████████████▌                          | 199/559 [00:30<00:52,  6.80it/s][A
 36%|█████████████▉                         | 200/559 [01:45<2:15:36, 22.66s/it][A
 36%|██████████████                         | 201/559 [01:46<1:35:00, 15.92s/it][A
 36%|██████████████                         | 202/559 [01:46<1:06:36, 11.20s/it][A
 36%|██████████████▉                          | 203/559 [01:46<46:45,  7.88s/it][A
 36%|██████████████▉                          | 204/559 [01:46<32:55,  5.57s/it][A
 37%|███████████████                          | 205/559 [01:46<23:15,  3.94s

 52%|█████████████████████▎                   | 291/559 [02:00<00:36,  7.26it/s][A
 52%|█████████████████████▍                   | 292/559 [02:00<00:36,  7.31it/s][A
 52%|█████████████████████▍                   | 293/559 [02:00<00:38,  6.86it/s][A
 53%|█████████████████████▌                   | 294/559 [02:01<00:40,  6.50it/s][A
 53%|█████████████████████▋                   | 295/559 [02:01<00:41,  6.43it/s][A
 53%|█████████████████████▋                   | 296/559 [02:01<00:40,  6.44it/s][A
 53%|█████████████████████▊                   | 297/559 [02:01<00:41,  6.34it/s][A
 53%|█████████████████████▊                   | 298/559 [02:01<00:39,  6.57it/s][A
 53%|█████████████████████▉                   | 299/559 [02:01<00:40,  6.47it/s][A
 54%|██████████████████████                   | 300/559 [02:02<00:37,  6.86it/s][A
 54%|██████████████████████                   | 301/559 [02:02<00:36,  7.07it/s][A
 54%|██████████████████████▏                  | 302/559 [02:02<00:36,  6.98i

 69%|████████████████████████████▍            | 388/559 [02:16<00:33,  5.15it/s][A
 70%|████████████████████████████▌            | 389/559 [02:16<00:29,  5.68it/s][A
 70%|████████████████████████████▌            | 390/559 [02:16<00:28,  5.92it/s][A
 70%|████████████████████████████▋            | 391/559 [02:16<00:27,  6.17it/s][A
 70%|████████████████████████████▊            | 392/559 [02:17<00:27,  6.16it/s][A
 70%|████████████████████████████▊            | 393/559 [02:17<00:24,  6.64it/s][A
 70%|████████████████████████████▉            | 394/559 [02:17<00:24,  6.80it/s][A
 71%|████████████████████████████▉            | 395/559 [02:17<00:22,  7.24it/s][A
 71%|█████████████████████████████            | 396/559 [02:17<00:22,  7.18it/s][A
 71%|█████████████████████████████            | 397/559 [02:17<00:22,  7.05it/s][A
 71%|█████████████████████████████▏           | 398/559 [02:18<01:14,  2.16it/s][A
 71%|█████████████████████████████▎           | 399/559 [02:19<00:58,  2.75i

 87%|███████████████████████████████████▌     | 485/559 [03:47<00:11,  6.34it/s][A
 87%|███████████████████████████████████▋     | 486/559 [03:47<00:11,  6.42it/s][A
 87%|███████████████████████████████████▋     | 487/559 [03:47<00:11,  6.48it/s][A
 87%|███████████████████████████████████▊     | 488/559 [03:48<00:11,  6.05it/s][A
 87%|███████████████████████████████████▊     | 489/559 [03:48<00:11,  6.03it/s][A
 88%|███████████████████████████████████▉     | 490/559 [03:48<00:11,  6.11it/s][A
 88%|████████████████████████████████████     | 491/559 [03:48<00:10,  6.43it/s][A
 88%|████████████████████████████████████     | 492/559 [03:48<00:10,  6.58it/s][A
 88%|████████████████████████████████████▏    | 493/559 [03:48<00:09,  6.90it/s][A
 88%|████████████████████████████████████▏    | 494/559 [03:49<00:09,  6.80it/s][A
 89%|████████████████████████████████████▎    | 495/559 [03:49<00:09,  6.66it/s][A
 89%|████████████████████████████████████▍    | 496/559 [03:49<00:09,  6.63i

In [84]:
df['ADDRESS'] = address_list

In [85]:
df.to_csv('Assessors_hit_list_with_addresses.csv')

# Make Map

## Geocoder Setup

In [86]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [87]:
### add Cook County, IL to geo_address ###
df['geo_address'] = df['ADDRESS'] + ' Cook County, IL'

In [88]:
### Define and run geocoder ###
def geocode(add):
    g = gmaps_key.geocode(add)
    lat = g[0]["geometry"]["location"]["lat"]
    lng = g[0]["geometry"]["location"]["lng"]
    return (lat, lng)

df['geocoded'] = df['geo_address'].apply(geocode)

In [89]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].str.strip('()').str.split(', ', expand=True)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

## HTML Popup Formatter

In [90]:
df.columns

Index(['PARID', '2021 BOR Final \nAssessed Value',
       '2022 CCAO Initial \nAssessed Value', '$ Difference', '% Difference2',
       'Class', 'Township', 'ADDRESS', 'geo_address', 'geocoded', 'lat',
       'lon'],
      dtype='object')

In [91]:
df['2021 BOR Final \nAssessed Value'] = df['2021 BOR Final \nAssessed Value'].apply(lambda x: '{:,.0f}'.format(x))
df['2022 CCAO Initial \nAssessed Value'] = df['2022 CCAO Initial \nAssessed Value'].apply(lambda x: '{:,.0f}'.format(x))
df['$ Difference'] = df['$ Difference'].apply(lambda x: '{:,.0f}'.format(x))
df['% Difference2'] = df['% Difference2'].apply(lambda x: '{:.2f}'.format(x))

In [97]:
def popup_html(row):
    Address = row['ADDRESS']
    BOR_Final_2021 = row['2021 BOR Final \nAssessed Value']
    CCAO_Initial_Assessed_2022 = row['2022 CCAO Initial \nAssessed Value']
    difference = row['$ Difference']
    percent_difference = row['% Difference2']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>Board of Review value: </strong>${}'''.format(BOR_Final_2021) + '''<br>
    <strong>Assessor value: </strong>${}'''.format(CCAO_Initial_Assessed_2022) + '''<br>
    <strong>Difference: </strong>${}'''.format(difference) + '''<br>
    <strong>Percent difference: </strong>{}'''.format(percent_difference) + '''%<br>
    </html>
    '''
    return html

In [110]:
df.to_csv('assessor_hit_list_with_coords_before_corrections.csv')

## Correction Section

In [124]:
### 100 N Riverside St Fix ###
df.at[26,'lat']=(41.884139963130444)
df.at[26,'lon']=(-87.6384974876427)

In [121]:
### Drop rows based on index ###
index_drop_list = [175,214,432,454,463]
df.drop(df.index[index_drop_list], inplace=True)

In [122]:
### Make sure only unknown addresses were dropped ###
df.loc[df['ADDRESS'].str.contains('UNKNOWN')]

Unnamed: 0,PARID,2021 BOR Final \nAssessed Value,2022 CCAO Initial \nAssessed Value,$ Difference,% Difference2,Class,Township,ADDRESS,geo_address,geocoded,lat,lon


In [125]:
### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=10)


### Create title ###
title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f"INSERT TITLE HERE")

m.get_root().html.add_child(folium.Element(title_html))

for index, row in df.iterrows():
    marker = folium.Marker(
        location=[row['lat'], row['lon']],
        popup=folium.Popup(popup_html(row), max_width=400))
    marker.add_to(m)

# Display map
m

In [94]:
m.save('index.html')

## Map URL snagger

In [95]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [96]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/assessor_hit_list
