## Imports

In [56]:
import pandas as pd
import numpy as np
import re
import os
import folium
import geopandas as gpd

## Data Read-in

In [57]:
df = pd.read_csv('redfin_2023-09-08-10-32-58.csv')
BC_gf = gpd.read_file('countyboundary')

## Data Clean

In [58]:
df = df.rename(columns={'URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)':'URL'})

In [59]:
df = df.dropna(subset=['SOLD DATE'])

In [60]:
# Define list of desired months (excluding current month)
desired_months = ['August']

# Filter DataFrame to include only entries from desired months
df_filtered = df[df['SOLD DATE'].str.split('-', expand=True)[0].isin(desired_months)]

# Reset the index
df_filtered = df_filtered.reset_index(drop=True)

In [61]:
# Data checks
print(df_filtered['PRICE'].isna().value_counts())
print('-------')
print(df_filtered['$/SQUARE FEET'].isna().value_counts())
print('-------')
print(df_filtered['YEAR BUILT'].isna().value_counts())
print('-------')

PRICE
False    926
Name: count, dtype: int64
-------
$/SQUARE FEET
False    910
True      16
Name: count, dtype: int64
-------
YEAR BUILT
False    926
Name: count, dtype: int64
-------


In [62]:
# sorted_df = df_filtered.sort_values(by='YEAR BUILT', ascending=False)
# second_newest_building = sorted_df.iloc[2]
# print(second_newest_building['URL'])

In [63]:
df_filtered.loc[df_filtered['PRICE'] == '0']

Unnamed: 0,SALE TYPE,SOLD DATE,PROPERTY TYPE,ADDRESS,CITY,STATE OR PROVINCE,ZIP OR POSTAL CODE,PRICE,BEDS,BATHS,...,STATUS,NEXT OPEN HOUSE START TIME,NEXT OPEN HOUSE END TIME,URL,SOURCE,MLS#,FAVORITE,INTERESTED,LATITUDE,LONGITUDE


In [64]:
df_filtered['PRICE'] = pd.to_numeric(df_filtered['PRICE'])
df_filtered['$/SQUARE FEET'] = pd.to_numeric(df_filtered['$/SQUARE FEET'])
df_filtered['YEAR BUILT'] = pd.to_numeric(df_filtered['YEAR BUILT'])
df_filtered['LATITUDE'] = pd.to_numeric(df_filtered['LATITUDE'])
df_filtered['LONGITUDE'] = pd.to_numeric(df_filtered['LONGITUDE'])

In [65]:
df_filtered.sort_values(by='PRICE',ascending=True).head(20)

Unnamed: 0,SALE TYPE,SOLD DATE,PROPERTY TYPE,ADDRESS,CITY,STATE OR PROVINCE,ZIP OR POSTAL CODE,PRICE,BEDS,BATHS,...,STATUS,NEXT OPEN HOUSE START TIME,NEXT OPEN HOUSE END TIME,URL,SOURCE,MLS#,FAVORITE,INTERESTED,LATITUDE,LONGITUDE
115,PAST SALE,August-15-2023,Condo/Co-op,111 Golden Isles Dr Unit G 5,Hallandale Beach,FL,33009.0,155.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Hallandale-Beach/111...,Beaches MLS,RX-10903582,N,Y,25.98287,-80.12323
587,PAST SALE,August-31-2023,Condo/Co-op,1400 Saint Charles Pl #207,Pembroke Pines,FL,33026.0,160.0,2.0,2.0,...,Sold,,,https://www.redfin.com/FL/Pembroke-Pines/1400-...,Beaches MLS,F10384686,N,Y,26.019847,-80.28174
757,PAST SALE,August-31-2023,Condo/Co-op,3750 NW 115th Way Unit 3-1,Coral Springs,FL,33065.0,229.0,3.0,2.0,...,Sold,,,https://www.redfin.com/FL/Coral-Springs/3750-N...,MARMLS,A11364398,N,Y,26.27706,-80.281218
324,PAST SALE,August-18-2023,Condo/Co-op,420 S Park Rd Unit 2-110,Hollywood,FL,33021.0,235.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Hollywood/420-S-Park...,MARMLS,A11416959,N,Y,26.007869,-80.176652
36,PAST SALE,August-8-2023,Condo/Co-op,9234 Wedgewood Ln Unit D7,Tamarac,FL,33321.0,290.0,2.0,2.0,...,Sold,,,https://www.redfin.com/FL/Tamarac/9234-Wedgewo...,Beaches MLS,F10387316,N,Y,26.202908,-80.273637
796,PAST SALE,August-23-2023,Condo/Co-op,2800 NW 56th Ave Unit G102,Lauderhill,FL,33313.0,1400.0,2.0,1.0,...,Sold,,,https://www.redfin.com/FL/Lauderhill/2800-NW-5...,MARMLS,A11341987,N,Y,26.162024,-80.225762
885,PAST SALE,August-29-2023,Condo/Co-op,3801 Environ Blvd #115,Lauderhill,FL,33319.0,65000.0,2.0,2.0,...,Sold,,,https://www.redfin.com/FL/Lauderhill/3801-Envi...,MARMLS,A11338802,N,Y,26.173523,-80.24446
457,PAST SALE,August-23-2023,Condo/Co-op,4751 NW 21st St #501,Lauderhill,FL,33313.0,74000.0,1.0,1.5,...,Sold,,,https://www.redfin.com/FL/Lauderhill/4751-NW-2...,Beaches MLS,F10383455,N,Y,26.153221,-80.213813
692,PAST SALE,August-17-2023,Condo/Co-op,441 NW 76th Ave #105,Margate,FL,33063.0,85000.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Margate/441-NW-76th-...,Beaches MLS,F10380250,N,Y,26.234444,-80.22739
504,PAST SALE,August-7-2023,Condo/Co-op,3130 Holiday Springs Blvd #306,Margate,FL,33063.0,86000.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Margate/3130-Holiday...,Beaches MLS,F10384224,N,Y,26.264539,-80.233845


In [66]:
print(df_filtered['URL'].iloc[796])

https://www.redfin.com/FL/Lauderhill/2800-NW-56th-Ave-33313/unit-G102/home/41603436


In [67]:
# Correct the prices, if needed
df_filtered.at[115,'PRICE']=(250000)
df_filtered.at[587,'PRICE']=(160000)
df_filtered.at[757,'PRICE']=(229000)
df_filtered.at[324,'PRICE']=(235000)
df_filtered.at[36,'PRICE']=(290000)
df_filtered.at[796,'PRICE']=(140000)

In [68]:
# Find problem psf by searching for a '0' value
df_filtered.loc[df_filtered['$/SQUARE FEET'] == '0'][['SOLD DATE','ADDRESS','CITY','$/SQUARE FEET','PRICE','SQUARE FEET']]

Unnamed: 0,SOLD DATE,ADDRESS,CITY,$/SQUARE FEET,PRICE,SQUARE FEET


In [69]:
# Corrections, if needed
df_filtered.at[115,'$/SQUARE FEET']=(250000/825)
df_filtered.at[587,'$/SQUARE FEET']=(160000/1295)
df_filtered.at[757,'$/SQUARE FEET']=(229000/1440)
df_filtered.at[324,'$/SQUARE FEET']=(235000/659)
df_filtered.at[36,'$/SQUARE FEET']=(290000/1060)
df_filtered.at[796,'$/SQUARE FEET']=(140000/878)

In [70]:
# Find problem psf by searching for low values
df_filtered.sort_values(by='$/SQUARE FEET',ascending=True).head(20)[['PRICE','ADDRESS','CITY','$/SQUARE FEET']]

Unnamed: 0,PRICE,ADDRESS,CITY,$/SQUARE FEET
885,65000.0,3801 Environ Blvd #115,Lauderhill,59.0
457,74000.0,4751 NW 21st St #501,Lauderhill,78.0
792,100000.0,3301 Spanish Moss Ter #3301,Lauderhill,90.0
739,95000.0,2840 Somerset Dr Unit 204M,Lauderdale Lakes,94.0
871,160000.0,3841 Environ Blvd #536,Lauderhill,101.0
427,90000.0,7960 Sunrise Lakes Dr N #304,Sunrise,106.0
439,119900.0,3801 Environ Blvd #219,Lauderhill,109.0
183,110000.0,3330 Spanish Moss Ter #406,Lauderhill,110.0
122,120000.0,5570 NW 44th St Unit 216A,Lauderhill,111.0
136,115000.0,9310 Sunrise Lakes Blvd #108,Sunrise,112.0


In [71]:
print(df_filtered.URL.iloc[8])

https://www.redfin.com/FL/Lauderhill/2930-NW-55th-Ave-33313/unit-1A/home/41601716


In [72]:
# # Drop sales that aren't condos but labeled as such
# df_filtered = df_filtered.drop(1320)

## Make Maps

In [73]:
### Create a price column formatted as currency ###
df_filtered['PRICE_AS_CURRENCY'] = df_filtered['PRICE'].apply(lambda x: "${:,.0f}".format(x))
### Set formatting for Beds, Baths ###
df_filtered['YEAR BUILT DISPLAY'] = df_filtered['YEAR BUILT'].apply(lambda x: '{:.0f}'.format(x))
df_filtered['PRICE_SQUARE_FEET_AS_CURRENCY'] = df_filtered['$/SQUARE FEET'].apply(lambda x: '${:,.0f}'.format(x))

In [74]:
df_filtered = df_filtered.sort_values(by=['PRICE'], ascending=False)
### Insert different colors for top 10 sales vs. the rest ###
df_filtered['COLOR'] = ''
### Create RANK column ###
df_filtered['RANK'] = 0
### Insert RANK values ###
df_filtered['RANK'] = range(1, len(df_filtered) + 1)
# use numpy to assign values to the 'COLOR' column
df_filtered['COLOR'] = np.where(df_filtered['RANK'] <= 10, 'orange', 'blue')

## HTML Popup Formatter

In [75]:
### Define list of columns to drop from DF ###
columns_drop = ['SALE TYPE','PROPERTY TYPE','STATE OR PROVINCE','ZIP OR POSTAL CODE','HOA/MONTH','STATUS','NEXT OPEN HOUSE START TIME','NEXT OPEN HOUSE END TIME','SOURCE','MLS#','FAVORITE','INTERESTED','SQUARE FEET','LOT SIZE']

In [76]:
### Drop the columns ###
df_filtered = df_filtered.drop(columns=columns_drop)

In [77]:
def popup_html(row):
    Price = row['PRICE_AS_CURRENCY']
    Address = row['ADDRESS']
    City = row['CITY']
    sold_date = row['SOLD DATE']
    beds = row['BEDS']
    baths = row['BATHS']
    psf = row['PRICE_SQUARE_FEET_AS_CURRENCY']
    year_built = row['YEAR BUILT DISPLAY']
    rank = row['RANK']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Price: </strong>{}'''.format(Price) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>City: </strong>{}'''.format(City) + '''<br>
    <strong>Sold: </strong>{}'''.format(sold_date) + '''<br>
    <strong>Beds: </strong>{}'''.format(beds) + '''<br>
    <strong>Baths: </strong>{}'''.format(baths) + '''<br>
    <strong>Price per sf: </strong>{}'''.format(psf) + '''<br>
    <strong>Year Built: </strong>{}'''.format(year_built) + '''<br>
    <strong>Price Rank: </strong>{}'''.format(rank) + '''
    </html>
    '''
    return html

In [78]:
### Create map container ###
m = folium.Map(location=df_filtered[["LATITUDE", "LONGITUDE"]].mean().to_list(),zoom_start=10,tiles=None)

### Create title ###
title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f"August 2023 Condo Sales")

m.get_root().html.add_child(folium.Element(title_html))

# Create two FeatureGroups for different color pins
fg_blue = folium.FeatureGroup(name='All other sales')
fg_orange = folium.FeatureGroup(name='Top 10 Sales')

folium.GeoJson(BC,tooltip='Broward County',name='Broward County').add_to(m)

for index, row in df_filtered.iterrows():
    # Add the markers to the appropriate FeatureGroup based on the color
    if row['COLOR'] == 'blue':
        marker = folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_blue)
    else:
        marker = folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_orange)

# Add the FeatureGroups to the map
fg_orange.add_to(m)
fg_blue.add_to(m)

folium.TileLayer('OpenStreetMap',control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)

# Display map
m

NameError: name 'BC' is not defined

In [None]:
m.save('index.html')

## Summary Info

In [None]:
BR = '\n'

ME = '\033[1m' + 'Most Expensive' + '\033[0m'
LE = '\033[1m' + 'Least Expensive' + '\033[0m'

MAX_PSF = '\033[1m' + 'Highest Price Per Square Foot' + '\033[0m'
MIN_PSF = '\033[1m' + 'Lowest Price Per Square Foot' + '\033[0m'

Newest = '\033[1m' + 'Newest' + '\033[0m'
Oldest = '\033[1m' + 'Oldest' + '\033[0m'

In [None]:
df_filtered.columns

In [None]:
df_filtered['FULL_ADDRESS'] = df_filtered['ADDRESS'] + ' ' + df_filtered['CITY']

In [None]:
print(df_filtered.loc[df_filtered['PRICE'].idxmin()]['URL'])

In [None]:
print(f"{ME}{BR}{df_filtered.loc[df_filtered['PRICE'].idxmax()]['LOCATION']}, {df_filtered.loc[df_filtered['PRICE'].idxmax()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['PRICE'].idxmax()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['PRICE'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['PRICE'].idxmax()]['YEAR BUILT']:.0f}")
print(f"{LE}{BR}{df_filtered.loc[df_filtered['PRICE'].idxmin()]['LOCATION']}, {df_filtered.loc[df_filtered['PRICE'].idxmin()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['PRICE'].idxmin()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['PRICE'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['PRICE'].idxmin()]['YEAR BUILT']:.0f}")

print(f"{MAX_PSF}{BR}{df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['LOCATION']}, {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['YEAR BUILT']:.0f}")
print(f"{MIN_PSF}{BR}{df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['LOCATION']}, {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['YEAR BUILT']:.0f}")

print(f"{Newest}{BR}{df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['LOCATION']}, {df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['YEAR BUILT']:.0f}")
print(f"{Oldest}{BR}{df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['LOCATION']}, {df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['YEAR BUILT']:.0f}")

## Time on Market Calculator

In [None]:
print(df_filtered.loc[df_filtered['PRICE'].idxmin()]['URL'])

In [None]:
from datetime import datetime, timedelta

date1 = datetime(2023, 7, 8) ## List (Earlier) date
date2 = datetime(2023, 8, 17) ## Close (Later) date

delta = date2 - date1
num_days = delta.days

print(num_days)

## Map URL Snagger

In [None]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [None]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

## Get Summary Data

In [None]:
print('SALES INFO')
print(f'Number of sales: {len(df_filtered)}')
print('--------')
print(f'Total sale price: ${df_filtered["PRICE"].sum():,.0f}')
print('--------')
print(f'Median sale price: ${df_filtered["PRICE"].median():,.0f}')
print('--------')
print(f'Max sale price: ${df_filtered["PRICE"].max():,.0f}')
print('--------')
print(f'Min sale price: ${df_filtered["PRICE"].min():,.0f}')
print('------------------------------------------------')
print('PSF INFO')
print(f'Max price per square foot: ${df_filtered["$/SQUARE FEET"].max():,.0f}')
print('--------')
print(f'Min price per square foot: ${df_filtered["$/SQUARE FEET"].min():,.0f}')
print('--------')
print(f'Median price per square foot: ${df_filtered["$/SQUARE FEET"].median():,.0f}')
print('------------------------------------------------')
print('CONDO AGES')
print(f'Newest building: {df_filtered["YEAR BUILT"].max()}')
print('----------')
print(f'Oldest building: {df_filtered["YEAR BUILT"].min()}')
print('----------')
print(f'Average building age: {df_filtered["YEAR BUILT"].mean()}')