## Imports

In [27]:
import googlemaps
import pandas as pd
import numpy as np
import re
import os
import geopandas as gpd
import folium
import tabula

## Read-in

In [28]:
# Gotta figure out how to get rid of the java error.
# SO: https://stackoverflow.com/questions/54817211/java-command-is-not-found-from-this-python-process-please-ensure-java-is-inst

dsf = tabula.read_pdf('condo.pdf', pages='all')

## Clean Data

In [29]:
df = dsf[0]
df.columns = df.iloc[0]
df = df[1:]
df = df.drop(columns=df.columns[0])
df = df.replace(np.nan, 'Not Available', regex=True)
df = df.rename(columns={'Building / Address / City': 'building_address_city','Days on\rMarket':'days_on_market',\
                        'Price /\rSq. Ft.':'price_per_sqft'})
df['building_address_city'] = df['building_address_city'].str.replace('^0', '', regex=True)
df['geo_address'] = df['building_address_city']
df['geo_address'] = df['geo_address'].map(lambda x: re.sub(r'\r', ' ', x))
df = df.replace('\n',' ',regex=True)
pattern = r'^(?P<building_name>[\d\s]*[A-Za-z\s]+)\s(?P<address>.+)\s(?P<city>[A-Za-z\s]+)$'
df2 = df['geo_address']
df2 = df2.to_frame()
df2[['building_name', 'address', 'city']] = df2['geo_address'].str.extract(pattern, expand=True)

Review this later (it's about how to properly merge DFs): https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html

## Geocode

In [30]:
result = pd.merge(df,df2,left_index=True, right_index=True)
df = result
df = df.drop(columns='city')

In [31]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [32]:
def geocode(add):
    g = gmaps_key.geocode(add)
    lat = g[0]["geometry"]["location"]["lat"]
    lng = g[0]["geometry"]["location"]["lng"]
    return (lat, lng)

df['geocoded'] = df['geo_address_x'].apply(geocode)

In [33]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].str.strip('()').str.split(', ', expand=True)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

Folium documentation link: https://python-visualization.github.io/folium/index.html

Folium is the library that lets us use leaflet with Python, since it's meant to be used with JavaScript.

For adding titles: https://stackoverflow.com/questions/61928013/adding-a-title-or-text-to-a-folium-map

## Correction section

In [34]:
# df.at[index#,'col_name']

## Format Data

In [35]:
### Insert NaNs if needed ###
df = df.replace('N/A', np.nan)

In [36]:
df['int_Sale_Price'] = df['Sale Price'].str.replace('$','',regex=False)

In [37]:
df['int_Sale_Price'] = df['int_Sale_Price'].str.replace(',','',regex=False)

In [38]:
df['int_Sale_Price'] = pd.to_numeric(df['int_Sale_Price'])

In [39]:
df['price_per_sqft'] = df['price_per_sqft'].str.replace('$','',regex=False)
df['price_per_sqft'] = df['price_per_sqft'].str.replace(',','',regex=False)
df['price_per_sqft'] = pd.to_numeric(df['price_per_sqft'])

df['days_on_market'] = pd.to_numeric(df['days_on_market'])

## Color-code top sale

In [40]:
### Insert RANK values ###
df['RANK'] = range(1, len(df) + 1)
# use numpy to assign values to the 'COLOR' column
df['COLOR'] = np.where(df['RANK'] <= 1, 'orange', 'blue')

## HTML Popup Formatter

In [41]:
df.columns

Index(['building_address_city', 'Close Date', 'days_on_market', 'Sale Price',
       'Sq. Ft.', 'price_per_sqft', 'Agent', 'Listing Broker', 'Buyer Agent',
       'Buyer Broker', 'geo_address_x', 'geo_address_y', 'building_name',
       'address', 'geocoded', 'lat', 'lon', 'int_Sale_Price', 'RANK', 'COLOR'],
      dtype='object')

In [45]:
def popup_html(row):
    building_name = row['building_name']
    price = row['Sale Price']
    days_on_market = row['days_on_market']
    listing_agent = row['Agent']
    buyers_agent = row['Buyer Agent']
    psf = row['price_per_sqft']
    address = row['address']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Building Name: </strong>{}'''.format(building_name) + '''<br>
    <strong>Sale Price: </strong>{}'''.format(price) + '''<br>
    <strong>Days on Market: </strong>{}'''.format(days_on_market) + '''<br>
    <strong>Listing Agent: </strong>{}'''.format(listing_agent) + '''<br>
    <strong>Buyer's Agent: </strong>{}'''.format(buyers_agent) + '''<br>
    <strong>Price sq ft: </strong>${}'''.format(psf) + '''<br>
    <strong>Address: </strong>{}'''.format(address) + '''<br>
    </html>
    '''
    return html

In [46]:
df

Unnamed: 0,building_address_city,Close Date,days_on_market,Sale Price,Sq. Ft.,price_per_sqft,Agent,Listing Broker,Buyer Agent,Buyer Broker,geo_address_x,geo_address_y,building_name,address,geocoded,lat,lon,int_Sale_Price,RANK,COLOR
1,Fendi Chateau\r9349 Collins Ave 406\rSurfside,6/21/2023,67,"$19,750,000.00",5720,3452.8,Ximena Penuela,Clarte Realty,Michele Redlich,Coldwell Banker Realty,Fendi Chateau 9349 Collins Ave 406 Surfside,Fendi Chateau 9349 Collins Ave 406 Surfside,Fendi Chateau,9349 Collins Ave 406,"(25.883, -80.12183999999999)",25.883,-80.12184,19750000.0,1,orange
2,Oceana Bal Harbour\r10201 Collins Ave 806\rBal...,6/22/2023,15,"$7,875,000.00",3070,3000.0,Jeremy Milgroom,Grafeno Realty,Toni Schrager,Brown Harris Stevens,Oceana Bal Harbour 10201 Collins Ave 806 Bal H...,Oceana Bal Harbour 10201 Collins Ave 806 Bal H...,Oceana Bal Harbour,10201 Collins Ave 806 Bal,"(25.8951514, -80.1230855)",25.895151,-80.123086,7875000.0,2,blue
3,Porto Vita\r19955 NE 38th Ct 2804\rAventura,6/20/2023,29,"$3,000,000.00",3950,759.49,Diane Lieberman,One Sotheby's International Realty,Beata Hill,Optimar International Realty,Porto Vita 19955 NE 38th Ct 2804 Aventura,Porto Vita 19955 NE 38th Ct 2804 Aventura,Porto Vita,19955 NE 38th Ct 2804,"(25.9614457, -80.12616750000001)",25.961446,-80.126168,3000000.0,3,blue
4,Lake Villa 2\r733 Crandon Blvd PH6\rKey Biscayne,6/21/2023,459,"$2,752,000.00",2470,1114.17,Lucrecia Arana Lindemann,Destaca,Yunayka Martin,Luxe Properties,Lake Villa 2 733 Crandon Blvd PH6 Key Biscayne,Lake Villa 2 733 Crandon Blvd PH6 Key Biscayne,Lake Villa,2 733 Crandon Blvd PH6 Key,"(25.693713, -80.1628248)",25.693713,-80.162825,2752000.0,4,blue
5,Anastasia Coral Gables\r521 Anastasia Ave 521\...,6/20/2023,7,"$2,100,000.00",2976,705.65,Elena Kemper,BHHS EWM Realty,Ashley Cusack,BHHS EWM Realty,Anastasia Coral Gables 521 Anastasia Ave 521 C...,Anastasia Coral Gables 521 Anastasia Ave 521 C...,Anastasia Coral Gables,521 Anastasia Ave 521 Coral,"(25.7420923, -80.2651236)",25.742092,-80.265124,2100000.0,5,blue
6,The Point\r21200 Point Pl 2504\rAventura,6/23/2023,91,"$2,080,000.00",3173,655.53,Sharon Weissman Harari,One Sotheby's International Realty,Richard Goihman,Douglas Elliman,The Point 21200 Point Pl 2504 Aventura,The Point 21200 Point Pl 2504 Aventura,The Point,21200 Point Pl 2504,"(25.9731214, -80.1251248)",25.973121,-80.125125,2080000.0,6,blue
7,Bellamare\r6000 Island Blvd 2304\rAventura,6/22/2023,18,"$1,900,000.00",2030,935.96,Johanna Bassols,"Oceanica Real Estate, LLC",Coromoto Mora Ortega,Beachfront Realty Inc,Bellamare 6000 Island Blvd 2304 Aventura,Bellamare 6000 Island Blvd 2304 Aventura,Bellamare,6000 Island Blvd 2304,"(25.9426083, -80.1357173)",25.942608,-80.135717,1900000.0,7,blue
8,Aria on the Bay\r488 NE 18th St 1515\rMiami,6/23/2023,192,"$1,800,000.00",2018,891.97,Serena De Marta,Canvas Real Estate,Nicole Pellon,Blokhaus Real Estate,Aria on the Bay 488 NE 18th St 1515 Miami,Aria on the Bay 488 NE 18th St 1515 Miami,Aria on the Bay,488 NE 18th St 1515,"(25.7935581, -80.1870727)",25.793558,-80.187073,1800000.0,8,blue
9,Millenium Tower\r1435 Brickell Ave 3207\rMiami,6/23/2023,104,"$1,785,000.00",1801,991.12,Monique Pennen-Wills PA,BHHS EWM Realty,Jason Samuels,Coldwell Banker Realty,Millenium Tower 1435 Brickell Ave 3207 Miami,Millenium Tower 1435 Brickell Ave 3207 Miami,Millenium Tower,1435 Brickell Ave 3207,"(25.7588046, -80.1922639)",25.758805,-80.192264,1785000.0,9,blue
10,Trump Tower\r16001 Collins Ave 3104\rSunny Isl...,6/21/2023,15,"$1,750,000.00",2327,752.04,Michael Shir,GMT International Realty Inc,Jaine Gitelman,Trust Invest Real Estate Corp,Trump Tower 16001 Collins Ave 3104 Sunny Isles...,Trump Tower 16001 Collins Ave 3104 Sunny Isles...,Trump Tower,16001 Collins Ave 3104 Sunny Isles,"(25.9244562, -80.1216846)",25.924456,-80.121685,1750000.0,10,blue


In [47]:
import folium
from folium.plugins import MarkerCluster

m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Recent Miami-Dade Condo Sales ')

caption_html = '''
                <p align="center" style="vertical-align: bottom; font-size:13px"><i>{}</i></p>
                '''.format('June 18th - June 24th')


### Create map container ###
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=9.5,tiles=None)

# Create two FeatureGroups for different color pins
fg_blue = folium.FeatureGroup(name='All other sales')
fg_orange = folium.FeatureGroup(name='Top Sale')

for index, row in df.iterrows():
    # Add the markers to the appropriate FeatureGroup based on the color
    if row['COLOR'] == 'blue':
        marker = folium.Marker(
            location=[row['lat'], row['lon']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_blue)
    else:
        marker = folium.Marker(
            location=[row['lat'], row['lon']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_orange)

# Add the FeatureGroups to the map
fg_orange.add_to(m)
fg_blue.add_to(m)

folium.TileLayer('OpenStreetMap',control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))
m.get_root().html.add_child(folium.Element(caption_html))
            
# Display map
m

In [48]:
m.save('index.html')

## Data snagger

In [49]:
### Set up formatting ###
BR = '\n'

ME = '\033[1m' + 'Most Expensive' + '\033[0m'
LE = '\033[1m' + 'Least Expensive' + '\033[0m'

MAX_PSF = '\033[1m' + 'Highest Price Per Square Foot' + '\033[0m'
MIN_PSF = '\033[1m' + 'Lowest Price Per Square Foot' + '\033[0m'

DAYS_MAX = '\033[1m' + 'Most Days on Market' + '\033[0m'
DAYS_MIN = '\033[1m' + 'Fewest Days on Market' + '\033[0m'

In [52]:
### Highest and lowest sale price ###
print(f"{ME}{BR}{df.loc[df['int_Sale_Price'].idxmax()]['building_name']}, {df.loc[df['int_Sale_Price'].idxmax()]['address']} | Price ${df.loc[df['int_Sale_Price'].idxmax()]['int_Sale_Price']:,.0f} | ${df.loc[df['int_Sale_Price'].idxmax()]['price_per_sqft']:,.0f} psf | Listing agent: {df.loc[df['int_Sale_Price'].idxmax()]['Agent']} with {df.loc[df['int_Sale_Price'].idxmax()]['Listing Broker']} | Buyer's agent: {df.loc[df['int_Sale_Price'].idxmax()]['Buyer Agent']} with {df.loc[df['int_Sale_Price'].idxmax()]['Buyer Broker']} | Days on market: {df.loc[df['int_Sale_Price'].idxmax()]['days_on_market']}")
print(f"{LE}{BR}{df.loc[df['int_Sale_Price'].idxmin()]['building_name']}, {df.loc[df['int_Sale_Price'].idxmin()]['address']} | Price ${df.loc[df['int_Sale_Price'].idxmin()]['int_Sale_Price']:,.0f} | ${df.loc[df['int_Sale_Price'].idxmin()]['price_per_sqft']:,.0f} psf | Listing agent: {df.loc[df['int_Sale_Price'].idxmin()]['Agent']} with {df.loc[df['int_Sale_Price'].idxmin()]['Listing Broker']} | Buyer's agent: {df.loc[df['int_Sale_Price'].idxmin()]['Buyer Agent']} with {df.loc[df['int_Sale_Price'].idxmin()]['Buyer Broker']} | Days on market: {df.loc[df['int_Sale_Price'].idxmin()]['days_on_market']}")
### Highest and lowest psf ###
print(f"{MAX_PSF}{BR}{df.loc[df['price_per_sqft'].idxmax()]['building_name']}, {df.loc[df['price_per_sqft'].idxmax()]['address']} | Price ${df.loc[df['price_per_sqft'].idxmax()]['int_Sale_Price']:,.0f} | ${df.loc[df['price_per_sqft'].idxmax()]['price_per_sqft']:,.0f} psf | Listing agent: {df.loc[df['price_per_sqft'].idxmax()]['Agent']} with {df.loc[df['price_per_sqft'].idxmax()]['Listing Broker']} | Buyer's agent: {df.loc[df['price_per_sqft'].idxmax()]['Buyer Agent']} with {df.loc[df['price_per_sqft'].idxmax()]['Buyer Broker']} | Days on market: {df.loc[df['price_per_sqft'].idxmax()]['days_on_market']}")
print(f"{MIN_PSF}{BR}{df.loc[df['price_per_sqft'].idxmin()]['building_name']}, {df.loc[df['price_per_sqft'].idxmin()]['address']} | Price ${df.loc[df['price_per_sqft'].idxmin()]['int_Sale_Price']:,.0f} | ${df.loc[df['price_per_sqft'].idxmin()]['price_per_sqft']:,.0f} psf | Listing agent: {df.loc[df['price_per_sqft'].idxmin()]['Agent']} with {df.loc[df['price_per_sqft'].idxmin()]['Listing Broker']} | Buyer's agent: {df.loc[df['price_per_sqft'].idxmin()]['Buyer Agent']} with {df.loc[df['price_per_sqft'].idxmin()]['Buyer Broker']} | Days on market: {df.loc[df['price_per_sqft'].idxmin()]['days_on_market']}")
### Highest and lowest days on market ###
print(f"{DAYS_MAX}{BR}{df.loc[df['days_on_market'].idxmax()]['building_name']}, {df.loc[df['days_on_market'].idxmax()]['address']} | Price ${df.loc[df['days_on_market'].idxmax()]['int_Sale_Price']:,.0f} | ${df.loc[df['days_on_market'].idxmax()]['price_per_sqft']:,.0f} psf | Listing agent: {df.loc[df['days_on_market'].idxmax()]['Agent']} with {df.loc[df['days_on_market'].idxmax()]['Listing Broker']} | Buyer's agent: {df.loc[df['days_on_market'].idxmax()]['Buyer Agent']} with {df.loc[df['days_on_market'].idxmax()]['Buyer Broker']} | Days on market: {df.loc[df['days_on_market'].idxmax()]['days_on_market']}")
print(f"{DAYS_MIN}{BR}{df.loc[df['days_on_market'].idxmin()]['building_name']}, {df.loc[df['days_on_market'].idxmin()]['address']} | Price ${df.loc[df['days_on_market'].idxmin()]['int_Sale_Price']:,.0f} | ${df.loc[df['days_on_market'].idxmin()]['price_per_sqft']:,.0f} psf | Listing agent: {df.loc[df['days_on_market'].idxmin()]['Agent']} with {df.loc[df['days_on_market'].idxmin()]['Listing Broker']} | Buyer's agent: {df.loc[df['days_on_market'].idxmin()]['Buyer Agent']} with {df.loc[df['days_on_market'].idxmin()]['Buyer Broker']} | Days on market: {df.loc[df['days_on_market'].idxmin()]['days_on_market']}")

[1mMost Expensive[0m
Fendi Chateau, 9349 Collins Ave 406 | Price $19,750,000 | $3,453 psf | Listing agent: Ximena Penuela with Clarte Realty | Buyer's agent: Michele Redlich with Coldwell Banker Realty | Days on market: 67
[1mLeast Expensive[0m
Trump Tower, 16001 Collins Ave 3104 Sunny Isles | Price $1,750,000 | $752 psf | Listing agent: Michael Shir with GMT International Realty Inc | Buyer's agent: Jaine Gitelman with Trust Invest Real Estate Corp | Days on market: 15
[1mHighest Price Per Square Foot[0m
Fendi Chateau, 9349 Collins Ave 406 | Price $19,750,000 | $3,453 psf | Listing agent: Ximena Penuela with Clarte Realty | Buyer's agent: Michele Redlich with Coldwell Banker Realty | Days on market: 67
[1mLowest Price Per Square Foot[0m
The Point, 21200 Point Pl 2504 | Price $2,080,000 | $656 psf | Listing agent: Sharon Weissman Harari with One Sotheby's International Realty | Buyer's agent: Richard Goihman with Douglas Elliman | Days on market: 91
[1mMost Days on Market[0m


## Map URL snagger

Map template URL: `https://trd-digital.github.io/trd-news-interactive-maps/{map-folder-name}`

In [53]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [54]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/condo_sales_week_ending_06262023
