## Imports

In [43]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm
import googlemaps
import numpy as np
import re
import os
import folium

## PD Set Options

In [41]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 800)

## Data read-in

In [31]:
df = pd.read_excel('Copy of 230209.2021CCAORetentionPINs.xlsx')

## Data clean

In [32]:
df['PARID'] = df['PARID'].str.replace('-','')

## Test block

In [33]:
sample_pin = '17162050160000'
response = requests.get(f'https://www.cookcountyassessor.com/pin/{sample_pin}')
soup = BeautifulSoup(response.content, 'html.parser')
address = soup.find('span', text='Address').find_next('span').text
print(address)

10 S DEARBORN ST


## Set up scraper

In [34]:
def address_snagger(PARID):
    PIN = PARID['PARID']
    try:
        response = requests.get(f'https://www.cookcountyassessor.com/pin/{PIN}')
        soup = BeautifulSoup(response.content, 'html.parser')
        address = soup.find('span', text='Address').find_next('span').text
        return address
    except Exception as e:
        return PIN

In [35]:
# df['address'] = df.apply(address_snagger, axis=1)

In [36]:
address_list = [address_snagger(row) for row in tqdm(df.to_dict('records'))]


  0%|                                                   | 0/559 [00:00<?, ?it/s][A
  0%|                                           | 1/559 [00:00<01:07,  8.24it/s][A
  0%|▏                                          | 2/559 [00:00<01:16,  7.25it/s][A
  1%|▏                                          | 3/559 [00:00<02:18,  4.02it/s][A
  1%|▎                                          | 4/559 [00:00<01:56,  4.76it/s][A
  1%|▍                                          | 5/559 [00:00<01:46,  5.22it/s][A
  1%|▍                                          | 6/559 [00:01<01:59,  4.63it/s][A
  1%|▌                                          | 7/559 [00:01<01:46,  5.20it/s][A
  1%|▌                                          | 8/559 [00:01<01:38,  5.59it/s][A
  2%|▋                                          | 9/559 [00:01<01:32,  5.97it/s][A
  2%|▊                                         | 10/559 [00:01<01:25,  6.42it/s][A
  2%|▊                                         | 11/559 [00:01<01:19,  6.90

 17%|███████▎                                  | 97/559 [01:22<06:51,  1.12it/s][A
 18%|███████▎                                  | 98/559 [01:24<07:09,  1.07it/s][A
 18%|███████▍                                  | 99/559 [01:24<05:21,  1.43it/s][A
 18%|███████▎                                 | 100/559 [01:25<06:12,  1.23it/s][A
 18%|███████▍                                 | 101/559 [01:26<06:48,  1.12it/s][A
 18%|███████▍                                 | 102/559 [01:27<06:57,  1.09it/s][A
 18%|███████▌                                 | 103/559 [01:28<07:01,  1.08it/s][A
 19%|███████▋                                 | 104/559 [01:29<07:08,  1.06it/s][A
 19%|███████▋                                 | 105/559 [01:30<07:33,  1.00it/s][A
 19%|███████▊                                 | 106/559 [01:31<07:36,  1.01s/it][A
 19%|███████▊                                 | 107/559 [01:32<08:18,  1.10s/it][A
 19%|███████▉                                 | 108/559 [01:34<08:47,  1.17s

 35%|██████████████▏                          | 194/559 [03:01<07:29,  1.23s/it][A
 35%|██████████████▎                          | 195/559 [03:02<07:18,  1.20s/it][A
 35%|██████████████▍                          | 196/559 [03:03<06:50,  1.13s/it][A
 35%|██████████████▍                          | 197/559 [03:04<06:26,  1.07s/it][A
 35%|██████████████▌                          | 198/559 [03:05<06:55,  1.15s/it][A
 36%|██████████████▌                          | 199/559 [03:06<06:31,  1.09s/it][A
 36%|██████████████▋                          | 200/559 [03:07<06:32,  1.09s/it][A
 36%|██████████████▋                          | 201/559 [03:08<06:22,  1.07s/it][A
 36%|██████████████▊                          | 202/559 [03:09<06:10,  1.04s/it][A
 36%|██████████████▉                          | 203/559 [03:10<06:04,  1.02s/it][A
 36%|██████████████▉                          | 204/559 [03:11<05:52,  1.01it/s][A
 37%|███████████████                          | 205/559 [03:12<05:54,  1.00s

 52%|█████████████████████▎                   | 291/559 [04:36<04:49,  1.08s/it][A
 52%|█████████████████████▍                   | 292/559 [04:37<04:36,  1.04s/it][A
 52%|█████████████████████▍                   | 293/559 [04:37<03:30,  1.26it/s][A
 53%|█████████████████████▌                   | 294/559 [04:38<03:34,  1.24it/s][A
 53%|█████████████████████▋                   | 295/559 [04:39<03:45,  1.17it/s][A
 53%|█████████████████████▋                   | 296/559 [04:40<03:46,  1.16it/s][A
 53%|█████████████████████▊                   | 297/559 [04:41<04:03,  1.07it/s][A
 53%|█████████████████████▊                   | 298/559 [04:42<04:06,  1.06it/s][A
 53%|█████████████████████▉                   | 299/559 [04:43<04:10,  1.04it/s][A
 54%|██████████████████████                   | 300/559 [04:44<04:14,  1.02it/s][A
 54%|██████████████████████                   | 301/559 [04:45<04:18,  1.00s/it][A
 54%|██████████████████████▏                  | 302/559 [04:46<04:07,  1.04i

 69%|████████████████████████████▍            | 388/559 [06:11<02:45,  1.04it/s][A
 70%|████████████████████████████▌            | 389/559 [06:12<02:43,  1.04it/s][A
 70%|████████████████████████████▌            | 390/559 [06:13<02:44,  1.02it/s][A
 70%|████████████████████████████▋            | 391/559 [06:14<02:42,  1.03it/s][A
 70%|████████████████████████████▊            | 392/559 [06:15<02:50,  1.02s/it][A
 70%|████████████████████████████▊            | 393/559 [06:16<02:47,  1.01s/it][A
 70%|████████████████████████████▉            | 394/559 [06:17<02:44,  1.00it/s][A
 71%|████████████████████████████▉            | 395/559 [06:18<02:40,  1.02it/s][A
 71%|█████████████████████████████            | 396/559 [06:19<02:40,  1.01it/s][A
 71%|█████████████████████████████            | 397/559 [06:20<02:37,  1.03it/s][A
 71%|█████████████████████████████▏           | 398/559 [06:20<02:33,  1.05it/s][A
 71%|█████████████████████████████▎           | 399/559 [06:21<02:30,  1.06i

 87%|███████████████████████████████████▌     | 485/559 [07:47<01:13,  1.00it/s][A
 87%|███████████████████████████████████▋     | 486/559 [07:48<01:11,  1.01it/s][A
 87%|███████████████████████████████████▋     | 487/559 [07:49<01:10,  1.01it/s][A
 87%|███████████████████████████████████▊     | 488/559 [07:50<01:08,  1.03it/s][A
 87%|███████████████████████████████████▊     | 489/559 [07:51<01:08,  1.02it/s][A
 88%|███████████████████████████████████▉     | 490/559 [07:52<01:07,  1.03it/s][A
 88%|████████████████████████████████████     | 491/559 [07:53<01:06,  1.02it/s][A
 88%|████████████████████████████████████     | 492/559 [07:54<01:06,  1.01it/s][A
 88%|████████████████████████████████████▏    | 493/559 [07:55<01:04,  1.03it/s][A
 88%|████████████████████████████████████▏    | 494/559 [07:56<01:04,  1.01it/s][A
 89%|████████████████████████████████████▎    | 495/559 [07:57<01:01,  1.03it/s][A
 89%|████████████████████████████████████▍    | 496/559 [07:58<01:00,  1.05i

In [37]:
df['ADDRESS'] = address_list

In [40]:
df.to_csv('Assessors_hit_list_with_addresses.csv')

# Make Map

## Geocoder Setup

In [44]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [46]:
### add Cook County, IL to geo_address ###
df['geo_address'] = df['ADDRESS'] + ' Cook County, IL'

In [48]:
### Define and run geocoder ###
def geocode(add):
    g = gmaps_key.geocode(add)
    lat = g[0]["geometry"]["location"]["lat"]
    lng = g[0]["geometry"]["location"]["lng"]
    return (lat, lng)

df['geocoded'] = df['geo_address'].apply(geocode)

In [50]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].str.strip('()').str.split(', ', expand=True)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

## HTML Popup Formatter

In [51]:
df.columns

Index(['PARID', '2021 BOR Final \nAssessed Value',
       '2022 CCAO Initial \nAssessed Value', '$ Difference', '% Difference2',
       'Class', 'Township', 'ADDRESS', 'geo_address', 'geocoded', 'lat',
       'lon'],
      dtype='object')

In [52]:
def popup_html(row):
    Address = row['ADDRESS']
    BOR_Final_2021 = row['2021 BOR Final \nAssessed Value']
    CCAO_Initial_Assessed_2022 = row['2022 CCAO Initial \nAssessed Value']
    difference = row['$ Difference']
    percent_difference = row['% Difference2']
    class_ = row['Class']
    township = row['Township']
    PIN = row['PARID']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>BOR Final 2021: </strong>{}'''.format(BOR_Final_2021) + '''<br>
    <strong>CCAO Initial Assessed 2022: </strong>{}'''.format(CCAO_Initial_Assessed_2022) + '''<br>
    <strong>Difference: </strong>{}'''.format(difference) + '''<br>
    <strong>% Difference: </strong>{}'''.format(percent_difference) + '''<br>
    <strong>% Class: </strong>{}'''.format(class_) + '''<br>
    <strong>% Township: </strong>{}'''.format(township) + '''<br>
    <strong>% PIN: </strong>{}'''.format(PIN) + '''<br>
    </html>
    '''
    return html

In [53]:
### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=10)


### Create title ###
title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f"INSERT TITLE HERE")

m.get_root().html.add_child(folium.Element(title_html))

for index, row in df.iterrows():
    marker = folium.Marker(
        location=[row['lat'], row['lon']],
        popup=folium.Popup(popup_html(row), max_width=400))
    marker.add_to(m)

# Display map
m

In [54]:
m.save('index.html')

## Map URL snagger