## Imports

In [18]:
import pandas as pd
import numpy as np
import re
import os
import folium

## Data Read-in

In [19]:
df = pd.read_csv('redfin_2023-05-04-10-13-35.csv')

## Data Clean

In [20]:
df = df.rename(columns={'URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)':'URL'})

In [21]:
df = df.dropna(subset=['SOLD DATE'])

In [22]:
# Define list of desired months (excluding current month)
desired_months = ['April']

# Filter DataFrame to include only entries from desired months
df_filtered = df[df['SOLD DATE'].str.split('-', expand=True)[0].isin(desired_months)]

# Reset the index
df_filtered = df_filtered.reset_index(drop=True)

In [23]:
# Data checks
print(df_filtered['PRICE'].isna().value_counts())
print('-------')
print(df_filtered['$/SQUARE FEET'].isna().value_counts())
print('-------')
print(df_filtered['YEAR BUILT'].isna().value_counts())
print('-------')

False    929
True       1
Name: PRICE, dtype: int64
-------
False    925
True       5
Name: $/SQUARE FEET, dtype: int64
-------
False    930
Name: YEAR BUILT, dtype: int64
-------


In [24]:
df_filtered.loc[df_filtered['PRICE'] == '0']

Unnamed: 0,SALE TYPE,SOLD DATE,PROPERTY TYPE,ADDRESS,CITY,STATE OR PROVINCE,ZIP OR POSTAL CODE,PRICE,BEDS,BATHS,...,STATUS,NEXT OPEN HOUSE START TIME,NEXT OPEN HOUSE END TIME,URL,SOURCE,MLS#,FAVORITE,INTERESTED,LATITUDE,LONGITUDE


In [25]:
df_filtered['PRICE'] = pd.to_numeric(df_filtered['PRICE'])
df_filtered['$/SQUARE FEET'] = pd.to_numeric(df_filtered['$/SQUARE FEET'])
df_filtered['YEAR BUILT'] = pd.to_numeric(df_filtered['YEAR BUILT'])
df_filtered['LATITUDE'] = pd.to_numeric(df_filtered['LATITUDE'])
df_filtered['LONGITUDE'] = pd.to_numeric(df_filtered['LONGITUDE'])

In [26]:
df_filtered.sort_values(by='PRICE',ascending=True).head(20)

Unnamed: 0,SALE TYPE,SOLD DATE,PROPERTY TYPE,ADDRESS,CITY,STATE OR PROVINCE,ZIP OR POSTAL CODE,PRICE,BEDS,BATHS,...,STATUS,NEXT OPEN HOUSE START TIME,NEXT OPEN HOUSE END TIME,URL,SOURCE,MLS#,FAVORITE,INTERESTED,LATITUDE,LONGITUDE
6,PAST SALE,April-26-2023,Condo/Co-op,501 NE 31st St #1106,Miami,FL,33137.0,742.0,3.0,2.0,...,Sold,,,https://www.redfin.com/FL/Miami/501-NE-31st-St...,MARMLS,A11370847,N,Y,25.806882,-80.187314
708,PAST SALE,April-20-2023,Condo/Co-op,3131 NE 7th Ave #302,Miami,FL,33137.0,749.0,1.0,1.5,...,Sold,,,https://www.redfin.com/FL/Miami/3131-NE-7th-Av...,MARMLS,A11287274,N,Y,25.807047,-80.185578
858,PAST SALE,April-7-2023,Condo/Co-op,900 4th St #2,Miami Beach,FL,33139.0,1100.0,2.0,2.0,...,Sold,,,https://www.redfin.com/FL/Miami-Beach/900-4th-...,MARMLS,A11261208,N,Y,25.773439,-80.137381
49,PAST SALE,April-17-2023,Condo/Co-op,10863 NW 7th St Unit 14-20,Miami,FL,33172.0,31000.0,3.0,2.0,...,Sold,,,https://www.redfin.com/FL/Miami/10863-NW-7th-S...,MARMLS,A11347841,N,Y,25.778187,-80.373743
152,PAST SALE,April-19-2023,Condo/Co-op,12500 NE 15th Ave #615,North Miami,FL,33161.0,65000.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/North-Miami/12500-NE...,MARMLS,A11342731,N,Y,25.890878,-80.168586
682,PAST SALE,April-3-2023,Condo/Co-op,303 NE 187th St #725,Miami,FL,33179.0,85000.0,1.0,1.5,...,Sold,,,https://www.redfin.com/FL/Miami/303-NE-187th-S...,MARMLS,A11298359,N,Y,25.946916,-80.195093
472,PAST SALE,April-27-2023,Condo/Co-op,13390 NE 7th Ave #304,North Miami,FL,33161.0,104500.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/North-Miami/13390-NE...,MARMLS,A11313361,N,Y,25.898291,-80.185458
647,PAST SALE,April-24-2023,Condo/Co-op,20330 NE 2nd Ave #9,Miami Gardens,FL,33179.0,107000.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Miami-Gardens/20330-...,MARMLS,A11327734,N,Y,25.961636,-80.198534
836,PAST SALE,April-6-2023,Condo/Co-op,5300 NW 87th Ave #1202,Doral,FL,33178.0,110000.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Doral/5300-NW-87th-A...,MARMLS,A11277606,N,Y,25.820668,-80.338239
273,PAST SALE,April-14-2023,Condo/Co-op,20490 NW 7th Ave #15,Miami Gardens,FL,33169.0,110000.0,1.0,1.0,...,Sold,,,https://www.redfin.com/FL/Miami/20490-NW-7th-A...,MARMLS,A11355515,N,Y,25.96291,-80.214321


In [32]:
print(df_filtered['URL'].iloc[682])

https://www.redfin.com/FL/Miami/303-NE-187th-St-33179/unit-725/home/43010542


In [33]:
# Correct the prices, if needed
df_filtered.at[6,'PRICE']=(742000)
df_filtered.at[708,'PRICE']=(749000)
df_filtered.at[858,'PRICE']=(1100000)
df_filtered.at[49,'PRICE']=(310000)
df_filtered.at[152,'PRICE']=(650000)

In [36]:
# Find problem psf by searching for a '0' value
df_filtered.loc[df_filtered['$/SQUARE FEET'] == '0'][['SOLD DATE','ADDRESS','CITY','$/SQUARE FEET','PRICE','SQUARE FEET']]

Unnamed: 0,SOLD DATE,ADDRESS,CITY,$/SQUARE FEET,PRICE,SQUARE FEET


In [61]:
# # Corrections, if needed
df_filtered.at[708,'$/SQUARE FEET']=(749000/959)
df_filtered.at[858,'$/SQUARE FEET']=(1100000/1251)
df_filtered.at[6,'$/SQUARE FEET']=(742000/1152)
df_filtered.at[49,'$/SQUARE FEET']=(310000/1215)
df_filtered.at[152,'$/SQUARE FEET']=(650000/1487)
df_filtered.at[682,'$/SQUARE FEET']=(85000/902)

In [62]:
# Find problem psf by searching for low values
df_filtered.sort_values(by='$/SQUARE FEET',ascending=True).head(20)[['PRICE','ADDRESS','CITY','$/SQUARE FEET']]

Unnamed: 0,PRICE,ADDRESS,CITY,$/SQUARE FEET
682,85000.0,303 NE 187th St #725,Miami,94.235033
145,172500.0,1690 NE 191st St Unit 302-1,Miami,103.0
83,176000.0,1690 NE 191 St Unit 414-1,Miami,105.0
718,250000.0,5300 NW 87th Ave #612,Doral,121.0
580,170000.0,1750 NE 191st St Unit 603-1,Miami,129.0
647,107000.0,20330 NE 2nd Ave #9,Miami Gardens,138.0
8,140000.0,460 NE 18th Ave #112,Homestead,139.0
482,167000.0,496 NW 165th St Rd Unit D-612,Miami,142.0
13,148000.0,460 NE 18th Ave #117,Homestead,147.0
169,210000.0,17901 NW 68th Ave Unit T208,Hialeah,148.0


In [63]:
print(df_filtered.URL.iloc[682])

https://www.redfin.com/FL/Miami-Gardens/6705-NW-169th-St-33015/unit-C312/home/42962628


In [17]:
# # Drop sales that aren't condos but labeled as such
# df_filtered = df_filtered.drop(1320)

## Make Maps

In [64]:
### Create a price column formatted as currency ###
df_filtered['PRICE_AS_CURRENCY'] = df_filtered['PRICE'].apply(lambda x: "${:,.0f}".format(x))
### Set formatting for Beds, Baths ###
df_filtered['YEAR BUILT DISPLAY'] = df_filtered['YEAR BUILT'].apply(lambda x: '{:.0f}'.format(x))
df_filtered['PRICE_SQUARE_FEET_AS_CURRENCY'] = df_filtered['$/SQUARE FEET'].apply(lambda x: '${:,.0f}'.format(x))

In [65]:
df_filtered = df_filtered.sort_values(by=['PRICE'], ascending=False)
### Insert different colors for top 10 sales vs. the rest ###
df_filtered['COLOR'] = ''
### Create RANK column ###
df_filtered['RANK'] = 0
### Insert RANK values ###
df_filtered['RANK'] = range(1, len(df_filtered) + 1)
# use numpy to assign values to the 'COLOR' column
df_filtered['COLOR'] = np.where(df_filtered['RANK'] <= 10, 'orange', 'blue')

## HTML Popup Formatter

In [66]:
### Define list of columns to drop from DF ###
columns_drop = ['SALE TYPE','PROPERTY TYPE','STATE OR PROVINCE','ZIP OR POSTAL CODE','HOA/MONTH','STATUS','NEXT OPEN HOUSE START TIME','NEXT OPEN HOUSE END TIME','SOURCE','MLS#','FAVORITE','INTERESTED','SQUARE FEET','LOT SIZE']

In [67]:
### Drop the columns ###
df_filtered = df_filtered.drop(columns=columns_drop)

KeyError: "['SALE TYPE', 'PROPERTY TYPE', 'STATE OR PROVINCE', 'ZIP OR POSTAL CODE', 'HOA/MONTH', 'STATUS', 'NEXT OPEN HOUSE START TIME', 'NEXT OPEN HOUSE END TIME', 'SOURCE', 'MLS#', 'FAVORITE', 'INTERESTED', 'SQUARE FEET', 'LOT SIZE'] not found in axis"

In [None]:
def popup_html(row):
    Price = row['PRICE_AS_CURRENCY']
    Address = row['ADDRESS']
    City = row['CITY']
    sold_date = row['SOLD DATE']
    beds = row['BEDS']
    baths = row['BATHS']
    psf = row['PRICE_SQUARE_FEET_AS_CURRENCY']
    year_built = row['YEAR BUILT DISPLAY']
    rank = row['RANK']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Price: </strong>{}'''.format(Price) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>City: </strong>{}'''.format(City) + '''<br>
    <strong>Sold: </strong>{}'''.format(sold_date) + '''<br>
    <strong>Beds: </strong>{}'''.format(beds) + '''<br>
    <strong>Baths: </strong>{}'''.format(baths) + '''<br>
    <strong>Price per sf: </strong>{}'''.format(psf) + '''<br>
    <strong>Year Built: </strong>{}'''.format(year_built) + '''<br>
    <strong>Price Rank: </strong>{}'''.format(rank) + '''
    </html>
    '''
    return html

In [68]:
### Create map container ###
m = folium.Map(location=df_filtered[["LATITUDE", "LONGITUDE"]].mean().to_list(),zoom_start=10,tiles=None)

### Create title ###
title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f"April 2023 Condo Sales")

m.get_root().html.add_child(folium.Element(title_html))

# Create two FeatureGroups for different color pins
fg_blue = folium.FeatureGroup(name='All other sales')
fg_orange = folium.FeatureGroup(name='Top 10 Sales')

for index, row in df_filtered.iterrows():
    # Add the markers to the appropriate FeatureGroup based on the color
    if row['COLOR'] == 'blue':
        marker = folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_blue)
    else:
        marker = folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_orange)

# Add the FeatureGroups to the map
fg_orange.add_to(m)
fg_blue.add_to(m)

folium.TileLayer('OpenStreetMap',control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)

# Display map

<folium.map.LayerControl at 0x7fb9bb187fd0>

In [69]:
m.save('index.html')

## Summary Info

In [70]:
BR = '\n'

ME = '\033[1m' + 'Most Expensive' + '\033[0m'
LE = '\033[1m' + 'Least Expensive' + '\033[0m'

MAX_PSF = '\033[1m' + 'Highest Price Per Square Foot' + '\033[0m'
MIN_PSF = '\033[1m' + 'Lowest Price Per Square Foot' + '\033[0m'

Newest = '\033[1m' + 'Newest' + '\033[0m'
Oldest = '\033[1m' + 'Oldest' + '\033[0m'

In [71]:
df_filtered.columns

Index(['SOLD DATE', 'ADDRESS', 'CITY', 'PRICE', 'BEDS', 'BATHS', 'LOCATION',
       'YEAR BUILT', 'DAYS ON MARKET', '$/SQUARE FEET', 'URL', 'LATITUDE',
       'LONGITUDE', 'PRICE_AS_CURRENCY', 'YEAR BUILT DISPLAY',
       'PRICE_SQUARE_FEET_AS_CURRENCY', 'COLOR', 'RANK', 'FULL_ADDRESS'],
      dtype='object')

In [72]:
df_filtered['FULL_ADDRESS'] = df_filtered['ADDRESS'] + ' ' + df_filtered['CITY']

In [81]:
print(df_filtered.loc[df_filtered['PRICE'].idxmin()]['URL'])

https://www.redfin.com/FL/Miami/303-NE-187th-St-33179/unit-725/home/43010542


In [73]:
print(f"{ME}{BR}{df_filtered.loc[df_filtered['PRICE'].idxmax()]['LOCATION']}, {df_filtered.loc[df_filtered['PRICE'].idxmax()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['PRICE'].idxmax()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['PRICE'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['PRICE'].idxmax()]['YEAR BUILT']:.0f}")
print(f"{LE}{BR}{df_filtered.loc[df_filtered['PRICE'].idxmin()]['LOCATION']}, {df_filtered.loc[df_filtered['PRICE'].idxmin()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['PRICE'].idxmin()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['PRICE'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['PRICE'].idxmin()]['YEAR BUILT']:.0f}")

print(f"{MAX_PSF}{BR}{df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['LOCATION']}, {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmax()]['YEAR BUILT']:.0f}")
print(f"{MIN_PSF}{BR}{df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['LOCATION']}, {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['$/SQUARE FEET'].idxmin()]['YEAR BUILT']:.0f}")

print(f"{Newest}{BR}{df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['LOCATION']}, {df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['YEAR BUILT'].idxmax()]['YEAR BUILT']:.0f}")
print(f"{Oldest}{BR}{df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['LOCATION']}, {df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['FULL_ADDRESS']} | Price ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['PRICE']:,.0f} | ${df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_filtered.loc[df_filtered['YEAR BUILT'].idxmin()]['YEAR BUILT']:.0f}")

[1mMost Expensive[0m
GROVE AT GRAND BAY, 2675 S Bayshore Dr Ph -01S Miami | Price $17,750,000 | $1,860 psf | Year built: 2016
[1mLeast Expensive[0m
STAR LAKES ESTATES NO 7 C, 303 NE 187th St #725 Miami | Price $85,000 | $94 psf | Year built: 1969
[1mHighest Price Per Square Foot[0m
APOGEE CONDO, 800 S Pointe Dr #1201 Miami Beach | Price $16,200,000 | $3,900 psf | Year built: 2008
[1mLowest Price Per Square Foot[0m
STAR LAKES ESTATES NO 7 C, 303 NE 187th St #725 Miami | Price $85,000 | $94 psf | Year built: 1969
[1mNewest[0m
Estates At Acqualina, 17901 Collins Ave #1605 Sunny Isles Beach | Price $11,625,000 | $2,226 psf | Year built: 2022
[1mOldest[0m
VENETIAN MANOR CONDO, 934 Michigan Ave #108 Miami Beach | Price $285,000 | $528 psf | Year built: 1925


## Time on Market Calculator

In [29]:
# print(df_Current.loc[df_Current['YEAR BUILT'].idxmin()])

In [82]:
from datetime import datetime, timedelta

date1 = datetime(2022, 2, 23) ## List (Earlier) date
date2 = datetime(2023, 4, 11) ## Close (Later) date

delta = date2 - date1
num_days = delta.days

print(num_days)

412


## Map URL Snagger

In [74]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [75]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/MD_condo_sales_month_ending_april_2023


## Get Summary Data

In [76]:
print('SALES INFO')
print(f'Number of sales: {len(df_filtered)}')
print('--------')
print(f'Total sale price: ${df_filtered["PRICE"].sum():,.0f}')
print('--------')
print(f'Median sale price: ${df_filtered["PRICE"].median():,.0f}')
print('--------')
print(f'Max sale price: ${df_filtered["PRICE"].max():,.0f}')
print('--------')
print(f'Min sale price: ${df_filtered["PRICE"].min():,.0f}')
print('------------------------------------------------')
print('PSF INFO')
print(f'Max price per square foot: ${df_filtered["$/SQUARE FEET"].max():,.0f}')
print('--------')
print(f'Min price per square foot: ${df_filtered["$/SQUARE FEET"].min():,.0f}')
print('--------')
print(f'Median price per square foot: ${df_filtered["$/SQUARE FEET"].median():,.0f}')
print('------------------------------------------------')
print('CONDO AGES')
print(f'Newest building: {df_filtered["YEAR BUILT"].max()}')
print('----------')
print(f'Oldest building: {df_filtered["YEAR BUILT"].min()}')
print('----------')
print(f'Average building age: {df_filtered["YEAR BUILT"].mean()}')

SALES INFO
Number of sales: 930
--------
Total sale price: $646,429,006
--------
Median sale price: $410,000
--------
Max sale price: $17,750,000
--------
Min sale price: $85,000
------------------------------------------------
PSF INFO
Max price per square foot: $3,900
--------
Min price per square foot: $94
--------
Median price per square foot: $432
------------------------------------------------
CONDO AGES
Newest building: 2022.0
----------
Oldest building: 1925.0
----------
Average building age: 1989.1204301075268
