## Imports

In [206]:
import pandas as pd
import numpy as np
import re
import os
import folium

## Data Read-in

In [207]:
df = pd.read_csv('redfin_2023-04-04-10-04-08.csv',dtype='str')

## Data Clean

In [208]:
df = df.rename(columns={'URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)':'URL'})

In [209]:
df = df.dropna(subset=['SOLD DATE'])

In [210]:
# Define list of desired months (excluding current month)
desired_months = ['February', 'March']

# Filter DataFrame to include only entries from desired months
df_filtered = df[df['SOLD DATE'].str.split('-', expand=True)[0].isin(desired_months)]

# Reset the index
df_filtered = df_filtered.reset_index(drop=True)

In [211]:
# Data checks
print(df_filtered['PRICE'].isna().value_counts())
print('-------')
print(df_filtered['$/SQUARE FEET'].isna().value_counts())
print('-------')

False    1812
Name: PRICE, dtype: int64
-------
False    1797
True       15
Name: $/SQUARE FEET, dtype: int64
-------


In [212]:
df_filtered.loc[df_filtered['PRICE'] == '0']

Unnamed: 0,SALE TYPE,SOLD DATE,PROPERTY TYPE,ADDRESS,CITY,STATE OR PROVINCE,ZIP OR POSTAL CODE,PRICE,BEDS,BATHS,...,STATUS,NEXT OPEN HOUSE START TIME,NEXT OPEN HOUSE END TIME,URL,SOURCE,MLS#,FAVORITE,INTERESTED,LATITUDE,LONGITUDE


In [213]:
df_filtered['PRICE'] = pd.to_numeric(df_filtered['PRICE'])
df_filtered['$/SQUARE FEET'] = pd.to_numeric(df_filtered['$/SQUARE FEET'])
df_filtered['YEAR BUILT'] = pd.to_numeric(df_filtered['YEAR BUILT'])
df_filtered['LATITUDE'] = pd.to_numeric(df_filtered['LATITUDE'])
df_filtered['LONGITUDE'] = pd.to_numeric(df_filtered['LONGITUDE'])

In [214]:
df_filtered.sort_values(by='PRICE',ascending=True).head(20)

Unnamed: 0,SALE TYPE,SOLD DATE,PROPERTY TYPE,ADDRESS,CITY,STATE OR PROVINCE,ZIP OR POSTAL CODE,PRICE,BEDS,BATHS,...,STATUS,NEXT OPEN HOUSE START TIME,NEXT OPEN HOUSE END TIME,URL,SOURCE,MLS#,FAVORITE,INTERESTED,LATITUDE,LONGITUDE
995,PAST SALE,March-28-2023,Condo/Co-op,2750 NE 183rd St #104,Aventura,FL,33160,255,2,2.0,...,Sold,,,https://www.redfin.com/FL/Aventura/2750-NE-183...,MARMLS,A11305707,N,Y,25.943908,-80.145145
800,PAST SALE,February-16-2023,Condo/Co-op,9441 SW 4th St #101,Miami,FL,33174,315,3,2.0,...,Sold,,,https://www.redfin.com/FL/Miami/9441-SW-4th-St...,MARMLS,A11316416,N,Y,25.765477,-80.350522
1291,PAST SALE,February-15-2023,Condo/Co-op,1351 NE Miami Gardens Dr Unit 922E,Miami,FL,33179,15500,1,2.0,...,Sold,,,https://www.redfin.com/FL/Miami/1351-NE-Miami-...,MARMLS,A11297694,N,Y,25.945827,-80.17481
582,PAST SALE,March-3-2023,Condo/Co-op,13480 NE 6th Ave #310,North Miami,FL,33161,40000,1,1.0,...,Sold,,,https://www.redfin.com/FL/North-Miami/13480-NE...,MARMLS,A11317232,N,Y,25.89894,-80.187168
906,PAST SALE,March-9-2023,Condo/Co-op,16425 Collins Ave #2,Sunny Isles Beach,FL,33160,40000,0,1.0,...,Sold,,,https://www.redfin.com/FL/Sunny-Isles-Beach/16...,MARMLS,A11313999,N,Y,25.927799,-80.121458
467,PAST SALE,March-29-2023,Condo/Co-op,20281 E Country Club Dr Unit CAB1,Aventura,FL,33180,50000,0,,...,Sold,,,https://www.redfin.com/FL/Aventura/20281-E-Cou...,MARMLS,A11342462,N,Y,25.965506,-80.125855
1686,PAST SALE,February-10-2023,Condo/Co-op,13480 NE 6th Ave #205,North Miami,FL,33161,50000,2,2.0,...,Sold,,,https://www.redfin.com/FL/North-Miami/13480-NE...,MARMLS,A11228625,N,Y,25.89894,-80.187168
531,PAST SALE,March-10-2023,Condo/Co-op,1401 NE Miami Gardens Dr #197,Miami,FL,33179,60000,0,1.0,...,Sold,,,https://www.redfin.com/FL/Miami/1401-NE-Miami-...,MARMLS,A11305770,N,Y,25.945427,-80.172641
1811,PAST SALE,March-9-2023,Condo/Co-op,251 NE 187th St #827,Miami,FL,33179,65500,1,1.0,...,Sold,,,https://www.redfin.com/FL/Miami/251-NE-187th-S...,MARMLS,A10680799,N,Y,25.946879,-80.196152
362,PAST SALE,February-24-2023,Condo/Co-op,19051 NE 2nd Ave #1505,Miami,FL,33179,89000,1,1.0,...,Sold,,,https://www.redfin.com/FL/Miami/19051-NE-2nd-A...,MARMLS,A11327317,N,Y,25.949734,-80.197047


In [215]:
print(df_filtered['URL'].iloc[1291])

https://www.redfin.com/FL/Miami/1351-NE-Miami-Gardens-Dr-33179/unit-922E/home/43011337


In [216]:
# Correct the prices, if needed
df_filtered.at[995,'PRICE']=(255000)
df_filtered.at[800,'PRICE']=(315000)
df_filtered.at[1291,'PRICE']=(155000)

In [217]:
# Find problem psf by searching for a '0' value
df_filtered.loc[df_filtered['$/SQUARE FEET'] == '0'][['SOLD DATE','ADDRESS','CITY','$/SQUARE FEET','PRICE','SQUARE FEET']]

Unnamed: 0,SOLD DATE,ADDRESS,CITY,$/SQUARE FEET,PRICE,SQUARE FEET


In [218]:
# Corrections, if needed
df_filtered.at[800,'$/SQUARE FEET']=(315000/1266)
df_filtered.at[995,'$/SQUARE FEET']=(255000/1073)

In [219]:
# Find problem psf by searching for low values
df_filtered.sort_values(by='$/SQUARE FEET',ascending=True).head(20)[['ADDRESS','CITY','$/SQUARE FEET']]

Unnamed: 0,ADDRESS,CITY,$/SQUARE FEET
1320,6770 Indian Creek Dr Unit CU-46,Miami Beach,0.0
1291,1351 NE Miami Gardens Dr Unit 922E,Miami,18.0
582,13480 NE 6th Ave #310,North Miami,55.0
1686,13480 NE 6th Ave #205,North Miami,55.0
1811,251 NE 187th St #827,Miami,77.0
212,1551 NE Miami Gardens Dr #336,Miami,100.0
362,19051 NE 2nd Ave #1505,Miami,105.0
947,245 NE 191st St #3002,Miami,114.0
842,1710 NE 191st St Unit 109-3,Miami,114.0
845,1750 NE 191st St Unit 400-1,Miami,116.0


In [220]:
print(df_filtered.URL.iloc[582])

https://www.redfin.com/FL/North-Miami/13480-NE-6th-Ave-33161/unit-310/home/43434030


In [221]:
# Corrections, if needed
#
#
#

In [222]:
# Drop sales that aren't condos but labeled as such
df_filtered = df_filtered.drop(1320)

## Make Maps

In [223]:
df_Current = df_filtered.loc[df_filtered['SOLD DATE'].str.contains('March')]

In [224]:
### Create a price column formatted as currency ###
df_Current['PRICE_AS_CURRENCY'] = df_Current['PRICE'].apply(lambda x: "${:,.0f}".format(x))
### Set formatting for Beds, Baths ###
df_Current['YEAR BUILT'] = df_Current['YEAR BUILT'].apply(lambda x: '{:.0f}'.format(x))
df_Current['PRICE_SQUARE_FEET_AS_CURRENCY'] = df_Current['$/SQUARE FEET'].apply(lambda x: '${:,.0f}'.format(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Current['PRICE_AS_CURRENCY'] = df_Current['PRICE'].apply(lambda x: "${:,.0f}".format(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Current['YEAR BUILT'] = df_Current['YEAR BUILT'].apply(lambda x: '{:.0f}'.format(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Current['PRICE_SQUARE_

In [225]:
### Insert different colors for top 10 sales vs. the rest ###
df_Current['COLOR'] = ''
### Create RANK column ###
df_Current['RANK'] = 0
### Insert RANK values ###
df_Current['RANK'] = range(1, len(df_Current) + 1)
# use numpy to assign values to the 'COLOR' column
df_Current['COLOR'] = np.where(df_Current['RANK'] <= 10, 'orange', 'blue')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Current['COLOR'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Current['RANK'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Current['RANK'] = range(1, len(df_Current) + 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

## HTML Popup Formatter

In [226]:
### Define list of columns to drop from DF ###
columns_drop = ['SALE TYPE','PROPERTY TYPE','STATE OR PROVINCE','ZIP OR POSTAL CODE','HOA/MONTH','STATUS','NEXT OPEN HOUSE START TIME','NEXT OPEN HOUSE END TIME','URL','SOURCE','MLS#','FAVORITE','INTERESTED','SQUARE FEET','LOT SIZE']

In [227]:
### Drop the columns ###
df_Current = df_Current.drop(columns=columns_drop)

In [228]:
def popup_html(row):
    Price = row['PRICE_AS_CURRENCY']
    Address = row['ADDRESS']
    City = row['CITY']
    sold_date = row['SOLD DATE']
    beds = row['BEDS']
    baths = row['BATHS']
    psf = row['PRICE_SQUARE_FEET_AS_CURRENCY']
    year_built = row['YEAR BUILT']
    rank = row['RANK']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Price: </strong>{}'''.format(Price) + '''<br>
    <strong>Address: </strong>{}'''.format(Address) + '''<br>
    <strong>City: </strong>{}'''.format(City) + '''<br>
    <strong>Sold: </strong>{}'''.format(sold_date) + '''<br>
    <strong>Beds: </strong>{}'''.format(beds) + '''<br>
    <strong>Baths: </strong>{}'''.format(baths) + '''<br>
    <strong>Price per sf: </strong>{}'''.format(psf) + '''<br>
    <strong>Year Built: </strong>{}'''.format(year_built) + '''<br>
    <strong>Price Rank: </strong>{}'''.format(rank) + '''
    </html>
    '''
    return html

In [233]:
### Create map container ###
m = folium.Map(location=df_March[["LATITUDE", "LONGITUDE"]].mean().to_list(),zoom_start=10.5,tiles=None)

### Create title ###
title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f"March 2023 Condo Sales")

m.get_root().html.add_child(folium.Element(title_html))

# Create two FeatureGroups for different color pins
fg_blue = folium.FeatureGroup(name='All other sales')
fg_orange = folium.FeatureGroup(name='Top 10 Sales')

for index, row in df_Current.iterrows():
    # Add the markers to the appropriate FeatureGroup based on the color
    if row['COLOR'] == 'blue':
        marker = folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_blue)
    else:
        marker = folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            fill=True,
            icon=folium.Icon(color=row['COLOR']),
            popup=folium.Popup(popup_html(row), max_width=400))
        marker.add_to(fg_orange)

# Add the FeatureGroups to the map
fg_orange.add_to(m)
fg_blue.add_to(m)

folium.TileLayer('OpenStreetMap',control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)

# Display map

<folium.map.LayerControl at 0x7fda5eafd130>

## Summary Info

In [230]:
BR = '\n'

ME = '\033[1m' + 'Most Expensive' + '\033[0m'
LE = '\033[1m' + 'Least Expensive' + '\033[0m'

MAX_PSF = '\033[1m' + 'Highest Price Per Square Foot' + '\033[0m'
MIN_PSF = '\033[1m' + 'Lowest Price Per Square Foot' + '\033[0m'

Newest = '\033[1m' + 'Newest' + '\033[0m'
Oldest = '\033[1m' + 'Oldest' + '\033[0m'

In [231]:
### Convert 'YEAR BUILT' back to integer ###
df_Current['YEAR BUILT'] = pd.to_numeric(df_Current['YEAR BUILT'])

In [232]:
print(f"{ME}{BR}{df_Current.loc[df_Current['PRICE'].idxmax()]['LOCATION']}, {df_Current.loc[df_Current['PRICE'].idxmax()]['ADDRESS']} | Price ${df_Current.loc[df_Current['PRICE'].idxmax()]['PRICE']:,.0f} | ${df_Current.loc[df_Current['PRICE'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_Current.loc[df_Current['PRICE'].idxmax()]['YEAR BUILT']}")
print(f"{LE}{BR}{df_Current.loc[df_Current['PRICE'].idxmin()]['LOCATION']}, {df_Current.loc[df_Current['PRICE'].idxmin()]['ADDRESS']} | Price ${df_Current.loc[df_Current['PRICE'].idxmin()]['PRICE']:,.0f} | ${df_Current.loc[df_Current['PRICE'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_Current.loc[df_Current['PRICE'].idxmin()]['YEAR BUILT']}")

print(f"{MAX_PSF}{BR}{df_Current.loc[df_Current['$/SQUARE FEET'].idxmax()]['LOCATION']}, {df_Current.loc[df_Current['$/SQUARE FEET'].idxmax()]['ADDRESS']} | Price ${df_Current.loc[df_Current['$/SQUARE FEET'].idxmax()]['PRICE']:,.0f} | ${df_Current.loc[df_Current['$/SQUARE FEET'].idxmax()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_Current.loc[df_Current['$/SQUARE FEET'].idxmax()]['YEAR BUILT']}")
print(f"{MIN_PSF}{BR}{df_Current.loc[df_Current['$/SQUARE FEET'].idxmin()]['LOCATION']}, {df_Current.loc[df_Current['$/SQUARE FEET'].idxmin()]['ADDRESS']} | Price ${df_Current.loc[df_Current['$/SQUARE FEET'].idxmin()]['PRICE']:,.0f} | ${df_Current.loc[df_Current['$/SQUARE FEET'].idxmin()]['$/SQUARE FEET']:,.0f} psf | Year built: {df_Current.loc[df_Current['$/SQUARE FEET'].idxmin()]['YEAR BUILT']}")

print(f"{Newest}{BR}{df_Current.loc[df_Current['YEAR BUILT'].idxmax()]['LOCATION']}, {df_Current.loc[df_Current['YEAR BUILT'].idxmax()]['ADDRESS']} | Price ${df_Current.loc[df_Current['YEAR BUILT'].idxmax()]['PRICE']:,.0f} | ${df_Current.loc[df_Current['YEAR BUILT'].idxmax()]['$/SQUARE FEET']} psf | Year built: {df_Current.loc[df_Current['YEAR BUILT'].idxmax()]['YEAR BUILT']}")
print(f"{Oldest}{BR}{df_Current.loc[df_Current['YEAR BUILT'].idxmin()]['LOCATION']}, {df_Current.loc[df_Current['YEAR BUILT'].idxmin()]['ADDRESS']} | Price ${df_Current.loc[df_Current['YEAR BUILT'].idxmin()]['PRICE']:,.0f} | ${df_Current.loc[df_Current['YEAR BUILT'].idxmin()]['$/SQUARE FEET']} psf | Year built: {df_Current.loc[df_Current['YEAR BUILT'].idxmin()]['YEAR BUILT']}")

[1mMost Expensive[0m
OCEANSIDE @ FISHER ISL CO, 7954 Fisher Island Dr #7954 | Price $21,000,000 | $3,078 psf | Year built: 1989
[1mLeast Expensive[0m
ZURICH CONDO, 13480 NE 6th Ave #310 | Price $40,000 | $55 psf | Year built: 1974
[1mHighest Price Per Square Foot[0m
CONTINUUM ON SOUTH BEACH, 50 S Pointe Dr #2802 | Price $10,150,000 | $4,055 psf | Year built: 2008
[1mLowest Price Per Square Foot[0m
ZURICH CONDO, 13480 NE 6th Ave #310 | Price $40,000 | $55 psf | Year built: 1974
[1mNewest[0m
Estates at Acqualina, 17975 Collins Ave #1202 | Price $8,950,000 | $nan psf | Year built: 2023
[1mOldest[0m
PALM GARDEN CONDO, 760 Meridian Ave #4 | Price $240,000 | $471.0 psf | Year built: 1923


## Time on Market Calculator

In [None]:
from datetime import datetime, timedelta

date1 = datetime(2022, 10, 24) ## List (Earlier) date
date2 = datetime(2023, 2, 14) ## Close (Later) date

delta = date2 - date1
num_days = delta.days

print(num_days)

## Map URL Snagger

In [234]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [None]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)