## Import and Set Options

In [1]:
from geopy.geocoders import GoogleV3
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import numpy as np
import urllib3
import re
from textwrap import shorten
import os
import geopandas as gpd

In [21]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 1600)
pd.set_option('display.max_colwidth', None) ### Default 50

In [4]:
df = pd.read_csv('Chicago_sample.csv')

## Test Stuff

In [15]:
df.columns

Index(['URL', 'HED', 'DEK', 'PUB_DATE', 'LEDE', 'STORY_TEXT', 'ADDRESS'], dtype='object')

In [9]:
df = df.drop(columns='Unnamed: 0')

In [11]:
df['STORY_TEXT'] = df['STORY_TEXT'].astype(str)

In [39]:
df['ADDRESS'] = df['STORY_TEXT'].str.extract(r'(?P<ADDRESS>[1-9][0-9]* (?:\w+\W+){1,6}(?:Road|Rd|Avenue|Ave|Boulevard|Blvd|Street|St|Place|Drive|Dr|Huron|Parkway|Way|Center|Lane|Court))')

In [40]:
df['ADDRESS'].isna().value_counts()

False    715
True     285
Name: ADDRESS, dtype: int64

In [41]:
# df.STORY_TEXT.iloc[570]

In [44]:
df[['URL','ADDRESS']].iloc[300:400]

Unnamed: 0,URL,ADDRESS
300,https://therealdeal.com/chicago/2022/03/11/trump-tower-penthouse-sale-may-signal-chicago-luxury-condo-markets-peak/,30 million. St
301,https://therealdeal.com/chicago/2021/12/06/kimpton-chicago-hotel-to-be-sold-for-20m-less-than-appraised-value-eight-years-ago/,225 North Wabash Avenue
302,https://therealdeal.com/chicago/2019/01/13/studio-gang-to-design-new-university-of-chicago-campus-in-paris/,
303,https://therealdeal.com/chicago/2022/05/09/brookfield-buys-chicago-reit-watermark-for-3-8b/,
304,https://therealdeal.com/chicago/2019/01/15/lake-forest-mansion-latest-pricey-chicago-area-resi-listing/,55 E Onwentsia Road
305,https://therealdeal.com/chicago/2019/05/01/how-a-2b-redevelopment-site-in-chicago-landed-in-an-opportunity-zone-a-trd-investigation/,
306,https://therealdeal.com/chicago/2020/12/01/marijuana-dispensaries-level-up-with-sleek-design-larger-footprints/,
307,https://therealdeal.com/chicago/2019/04/19/chicagos-first-building-code-overhaul-in-70-years-could-boost-housing-market-experts-say/,
308,https://therealdeal.com/chicago/2022/10/21/amcap-picks-up-suburban-chicago-shopping-center-for-19m/,
309,https://therealdeal.com/chicago/2018/10/06/what-will-the-next-iconic-chicago-home-look-like/,


## Link Formatter

In [48]:
df.columns

Index(['URL', 'HED', 'DEK', 'PUB_DATE', 'LEDE', 'STORY_TEXT', 'ADDRESS'], dtype='object')

In [58]:
df["short_description"] = df["LEDE"].apply(lambda s: shorten(s, width=20, placeholder=""))
df['remaining_desc'] = df.apply(lambda row : row['LEDE'].replace(str(row['short_description']), ''), axis=1)
df['remaining_desc']

df["description_link"] = '<a href="' + df["URL"] + '" target="_blank" rel="noopener noreferrer">' + df["short_description"] + "</a>" + df["remaining_desc"]
# df['description_link']

In [64]:
df['PUB_YEAR'] = df['PUB_DATE'].str.extract(r'(\d{4})')

In [68]:
# df[['PUB_DATE','PUB_YEAR']]

## Geocoder Setup

In [47]:
%store -r google_maps_API_Key
geolocator = GoogleV3(api_key=google_maps_API_Key)

In [51]:
df['geo_address'] = df['ADDRESS'] + ' Chicago, IL'
df['loc'] = df['geo_address'].apply(geolocator.geocode, timeout=10)
df['point'] = df['loc'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['lat','lon','altitude']] = pd.DataFrame(df['point'].to_list(),index=df.index)

In [72]:
df.columns

Index(['URL', 'HED', 'DEK', 'PUB_DATE', 'LEDE', 'STORY_TEXT', 'ADDRESS',
       'short_description', 'remaining_desc', 'description_link',
       'geo_address', 'loc', 'point', 'lat', 'lon', 'altitude', 'PUB_YEAR'],
      dtype='object')

In [126]:
# def popup_html(row):
#     i = row
#     URL = df2['URL'].iloc[i]
#     HED = df2['HED'].iloc[i]
#     DEK = df2['DEK'].iloc[i]
#     PUB_YEAR = df2['PUB_YEAR'].iloc[i]
#     LEDE = df2['LEDE'].iloc[i]
    
#     html = '''<!DOCTYPE html>
#     <html>
#     Folio: <strong>{}'''.format(URL) + '''</strong><br>
#     Address: <strong>{}'''.format(HED) + '''</strong><br>
#     Year Built: <strong>{}'''.format(DEK) + '''</strong><br>
#     Building Area: <strong>{}'''.format(PUB_YEAR) + '''</strong><br>
#     Lot Size: <strong>{}'''.format(LEDE) + '''</strong><br>
#     </html>
#     '''
#     return html

In [76]:
# df2 = df.dropna(subset='lat')

In [78]:
# df2 = df.dropna(subset='lon')

In [2]:
df2 = pd.read_csv('sample_map_data.csv')

In [3]:
df2.to_csv('sample_map_data.csv')

In [22]:
df2.columns

Index(['Unnamed: 0', 'URL', 'HED', 'DEK', 'PUB_DATE', 'LEDE', 'STORY_TEXT',
       'ADDRESS', 'short_description', 'remaining_desc', 'description_link',
       'geo_address', 'loc', 'point', 'lat', 'lon', 'altitude', 'PUB_YEAR'],
      dtype='object')

In [53]:
df3 = df2.head(200)

In [55]:
import folium
from folium import Map, FeatureGroup, Marker, LayerControl
import branca

chi_map = folium.Map(location=df3[["lat", "lon"]].mean().to_list(),zoom_start=9,tiles=None)

for grp_name, df_grp in df3.groupby('PUB_YEAR'):
    feature_group = folium.FeatureGroup(grp_name)
    for row in df_grp.itertuples():
        html = '''<!DOCTYPE html>
        <html>
        <strong>{}'''.format(row.HED) + '''</strong><br><br>
        <em>{}'''.format(row.DEK) + '''</em><br><br>
        {}'''.format(row.description_link) + '''<br><br>
        <u>Published: {}'''.format(row.PUB_YEAR) + '''</u><br>
        </html>
        '''
        iframe = branca.element.IFrame(html=html)
        popup = folium.Popup(folium.Html(html, script=True),min_width=200,max_width=200)
        folium.Marker(location=[row.lat,row.lon],popup=popup).add_to(feature_group)
        
    feature_group.add_to(chi_map)
    
folium.LayerControl().add_to(chi_map)
folium.TileLayer('OpenStreetMap',control=False).add_to(chi_map)
chi_map

In [24]:
chi_map.save('index.html')