In [1]:
import pandas as pd
import geopandas as gpd
import geopy

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import matplotlib.pyplot as plt

import folium
from folium.plugins import FastMarkerCluster

## Update real estate transactions
Web Scrapped on 6 Jan 2021, however transactions were only updated to 25 Sep 2020.

In [2]:
locator = Nominatim(user_agent="my_Geocoder")

In [3]:
df = pd.read_csv("real_est_6Jan21.csv")

In [4]:
# Get rid of first 2 index columns

df = df.iloc[:,2:]

In [5]:
df

Unnamed: 0,City,Address,Zip,Price,Date,Seller,Buyer,Transaction Type
0,Ithaca,"1108 N Aurora St(Ithaca, None)",,"$333,000",September-25-2020,"Osherow, David","Zussman-Dobbins, Zachariah",Standard (arm's length)
1,Ithaca,"312 E Falls St(Ithaca, None)",,"$297,500",September-24-2020,"Nell, Marika R","McKinven, Mary Jane",Standard (arm's length)
2,Ithaca,"403 Warren Rd(Ithaca, None)",,"$252,000",September-24-2020,"Correll, Andrew J","Jones, Laurine D",Standard (arm's length)
3,Ithaca,"109 Pine Tree Rd(Ithaca, None)",,"$319,000",September-24-2020,"Oland, Helen S","Servay, Elsbet",Standard (arm's length)
4,Ithaca,"402-404 W Court St(Ithaca, None)",,"$100,000",September-23-2020,"Wells, John","Wells, Charles",Buyer is seller
...,...,...,...,...,...,...,...,...
145,Ithaca,"212 Ridgedale Road(Ithaca, 14850)",14850,"$270,000",June-30-2020,"Hoellrich, Cameron M.","Chen, Guopeng",Standard (arm's length)
146,Ithaca,"107 Whitetail Dr(Ithaca, None)",,$1,June-29-2020,"Reichert, Michele","Michele S. Reichert, Living Trust",Buyer is seller
147,Ithaca,"211 Cobb Street(Ithaca, 14850)",14850,"$395,605",June-29-2020,"Garner, Matthew Ryan","Kapustin, Max",Standard (arm's length)
148,Ithaca,"205 Eastern Heights Drive(Ithaca, 14850)",14850,"$226,000",June-29-2020,"Salamon, Barbara","Madsen, Rachel M.",Standard (arm's length)


### Drop irrelevant sale types

In [6]:
df['Transaction Type'].unique()

array(["Standard (arm's length)", 'Buyer is seller', nan,
       'Buyer is seller, Not a sale deed, Sale between relatives',
       'Buyer is seller, Not a sale deed', 'Not a sale deed',
       'Interest conveyed', 'Not a sale deed, Sale between relatives',
       'Related company sale', 'Buyer is seller, Sale between relatives',
       'Sale between relatives', 'Buyer is seller, Related company sale',
       'Related company sale, Not a sale deed',
       'Buyer is seller, Related company sale, Sale between relatives',
       'Not a sale deed, Governement sale'], dtype=object)

In [7]:
df = df[(df['Transaction Type']== "Standard (arm's length)") | (df['Transaction Type']=='Interest conveyed') |
  (df['Transaction Type']== "Related company sale")]

In [8]:
df = df[df['Price']!= '$1']

In [9]:
df['Price'] = df['Price'].replace('[\$,]', '', regex=True).astype(int)

### Clean Address Column for geocoding

In [10]:
df['Address'] = df['Address'].str.split('(').str[0]
df['ADDRESS'] = df['Address'] + ', Tompkins County, NY, USA'
df.head(10)

Unnamed: 0,City,Address,Zip,Price,Date,Seller,Buyer,Transaction Type,ADDRESS
0,Ithaca,1108 N Aurora St,,333000,September-25-2020,"Osherow, David","Zussman-Dobbins, Zachariah",Standard (arm's length),"1108 N Aurora St, Tompkins County, NY, USA"
1,Ithaca,312 E Falls St,,297500,September-24-2020,"Nell, Marika R","McKinven, Mary Jane",Standard (arm's length),"312 E Falls St, Tompkins County, NY, USA"
2,Ithaca,403 Warren Rd,,252000,September-24-2020,"Correll, Andrew J","Jones, Laurine D",Standard (arm's length),"403 Warren Rd, Tompkins County, NY, USA"
3,Ithaca,109 Pine Tree Rd,,319000,September-24-2020,"Oland, Helen S","Servay, Elsbet",Standard (arm's length),"109 Pine Tree Rd, Tompkins County, NY, USA"
7,Ithaca,17 Chase Ln,,455000,September-22-2020,"McCarthy, Maris M","Sweet, Jai",Standard (arm's length),"17 Chase Ln, Tompkins County, NY, USA"
8,Ithaca,115 Auburn St,,342500,September-22-2020,"Colby, Phillip J","Barken Family Realty, LLC",Standard (arm's length),"115 Auburn St, Tompkins County, NY, USA"
11,Ithaca,105 Briarwood Dr,,404000,September-21-2020,Lucente Homes LLC,"Luh, Tabitha",Standard (arm's length),"105 Briarwood Dr, Tompkins County, NY, USA"
12,Ithaca,22 John St,,455000,September-18-2020,"Spreng, Robert Nathan","Longchamps, Louis",Standard (arm's length),"22 John St, Tompkins County, NY, USA"
14,Ithaca,303 Richard Pl,,270000,September-17-2020,"Thompson, A Quay","Green, Judith Joanna",Standard (arm's length),"303 Richard Pl, Tompkins County, NY, USA"
15,Ithaca,169 Snyder Hill Rd,,232500,September-17-2020,"Altes, Dean","Nunziata, Laurie",Standard (arm's length),"169 Snyder Hill Rd, Tompkins County, NY, USA"


In [11]:
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)    
df['location'] = df['ADDRESS'].apply(geocode)   
df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)

In [12]:
# Drop with null LatLong

df = df[~df['latitude'].isnull()]

## Mapping out points

In [13]:
map2 = folium.Map(location = [42.4440, -76.5019],
                 tiles= 'cartodbpositron',
                 zoom_start=12)

In [14]:
df.apply(lambda row: folium.Circle(location=[row['latitude'], row['longitude']], radius=2, color='crimson').add_to(map2), axis=1)

0      <folium.vector_layers.Circle object at 0x1410c...
1      <folium.vector_layers.Circle object at 0x1410c...
2      <folium.vector_layers.Circle object at 0x1410c...
3      <folium.vector_layers.Circle object at 0x1410c...
7      <folium.vector_layers.Circle object at 0x1410c...
                             ...                        
143    <folium.vector_layers.Circle object at 0x1410e...
144    <folium.vector_layers.Circle object at 0x1410e...
145    <folium.vector_layers.Circle object at 0x1410e...
147    <folium.vector_layers.Circle object at 0x1410e...
148    <folium.vector_layers.Circle object at 0x1410e...
Length: 105, dtype: object

In [15]:
map2