In [1]:
from homeharvest import scrape_property
from datetime import datetime
import pandas as pd

In [2]:
# Generate filename based on current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"HomeHarvest_{current_timestamp}.csv"

properties = scrape_property(
  location="New York City, NY",
  listing_type="for_sale",  # or (for_sale, for_rent, pending)
  past_days=30,  # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)

  # property_type=['single_family','multi_family'],
  # date_from="2023-05-01", # alternative to past_days
  # date_to="2023-05-28",
  # foreclosure=True
  # mls_only=True,  # only fetch MLS listings
)
print(f"Number of properties: {len(properties)}")

# Export to csv
properties.to_csv(filename, index=False)
print(properties.head())

Number of properties: 6442
                                        property_url property_id  listing_id  \
0  https://www.realtor.com/realestateandhomes-det...  4077245933  2981002761   
1  https://www.realtor.com/realestateandhomes-det...  3223695413  2981002745   
2  https://www.realtor.com/realestateandhomes-det...  4920617255  2981003116   
3  https://www.realtor.com/realestateandhomes-det...  3832627333  2981009567   
4  https://www.realtor.com/realestateandhomes-det...  3013845144  2980999135   

    mls                                mls_id    status  \
0  UCNY                   1822026691706750481  FOR_SALE   
1  TENY                               9970158  FOR_SALE   
2  CCRN  9da130d3-acbd-4c26-bec1-9f28a608a856  FOR_SALE   
3  UCNY                   1826392623938942081  FOR_SALE   
4  LINY                                853461  FOR_SALE   

                                                text         style  \
0                                        Coming Soon          COOP 

In [3]:
def read_data(file_name):
    """
    Read the data from the file and return a Pandas DataFrame
    file_name: str, name of the file to read
    return: Pandas DataFrame
    """
    return pd.read_csv(file_name)

data = read_data("HomeHarvest_20250425_154044.csv")
display(data)

Unnamed: 0,property_url,property_id,listing_id,mls,mls_id,status,text,style,full_street_line,street,...,builder_id,builder_name,office_id,office_mls_set,office_name,office_email,office_phones,nearby_schools,primary_photo,alt_photos
0,https://www.realtor.com/realestateandhomes-det...,9970797479,2981007048,STNY,11483599,PENDING,SHORT SALE SUBJECT TO THIRD PARTY APPROVAL. HO...,SINGLE_FAMILY,749 E 223rd St,749 E 223rd St,...,,,,O-STNY-22748,REALTY MASTERS INTERNATIONAL INC,paulrawh.nybroker@gmail.com,"[{'number': '9179127827', 'type': None, 'prima...","New York City Geographic District #11, Bronx C...",http://ap.rdcpix.com/1d834b288f5871263c20ac0fb...,http://ap.rdcpix.com/1d834b288f5871263c20ac0fb...
1,https://www.realtor.com/realestateandhomes-det...,9763769548,2981007299,SINY,2502294,FOR_SALE,,LAND,110 Brook St,110 Brook St,...,,,100454741.0,O-SINY-O11396,Keller Williams Realty Staten Island,valerievargas@kwsiny.com,"[{'number': '7187667159', 'type': 'Office', 'p...","NEW VENTURES CHARTER SCHOOL, New York City Geo...",http://ap.rdcpix.com/312537c21c46ded88337acde7...,http://ap.rdcpix.com/312537c21c46ded88337acde7...
2,https://www.realtor.com/realestateandhomes-det...,4373587091,2981008113,CCRN,b18ab9f8-0563-4416-ae76-bbc64fdec716,FOR_SALE,Sprawling TURN-KEY office suite with 6 separat...,COOP,98-120 Queens Blvd Apt 1F,98-120 Queens Blvd,...,,,1825634.0,O-CCRN-851B747B-7D83-40D4-857E-829767561B86,Corcoran Brooklyn Heights,yael.streit@corcoran.com,"[{'number': '7188529050', 'type': 'Office', 'p...","New York City Special Schools - District 75, N...",http://ap.rdcpix.com/95b16ee7fc5fa31e39ec8569f...,http://ap.rdcpix.com/95b16ee7fc5fa31e39ec8569f...
3,https://www.realtor.com/realestateandhomes-det...,3999961443,2981006202,UCNY,1826970711563433089,FOR_SALE,Perched on the 5th floor of a boutique pre-war...,COOP,35 E 10th St Apt 5A,35 E 10th St,...,,,2930336.0,O-UCNY-COMPASS,Compass,agentexperience@compass.com,"[{'number': '2129139058', 'type': 'Office', 'p...","SUCCESS ACADEMY CHARTER SCHOOL-UNION SQUARE, N...",http://ap.rdcpix.com/06886c6851564d9ff20dcac6c...,http://ap.rdcpix.com/06886c6851564d9ff20dcac6c...
4,https://www.realtor.com/realestateandhomes-det...,3948723737,2981007965,SINY,2502295,FOR_SALE,Welcome a beautifully maintained and upgraded ...,CONDOS,423 Willow Rd E Unit 2,423 Willow Rd,...,,,,O-SINY-O11355,EXP Realty,ny.broker@exprealty.net,,"NEW VENTURES CHARTER SCHOOL, New York City Geo...",http://ap.rdcpix.com/5dfa39cc76bcc2457b4729f03...,http://ap.rdcpix.com/5dfa39cc76bcc2457b4729f03...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6410,https://www.realtor.com/realestateandhomes-det...,3904036492,2979786201,NRNY,a2b2e052-0064-4262-9fc2-2e536f85bfb4,FOR_SALE,Exceptional living space! This 20 feet wide ho...,TOWNHOMES,239 E 48th St,239 E 48th St,...,,,2166206.0,O-NRNY-715F01A2-7CE4-41C9-A9BC-26478E281D52,Sotheby's International Realty - East Side Man...,marissa.ghesquiere@sothebyshomes.com,"[{'number': '2126067660', 'type': 'Office', 'p...",New York City Geographic District # 2,http://ap.rdcpix.com/f927b46a5cf7c5b7a26bcfd44...,http://ap.rdcpix.com/f927b46a5cf7c5b7a26bcfd44...
6411,https://www.realtor.com/realestateandhomes-det...,9434052616,2979787661,LINY,839269,FOR_SALE,Charming 1-Bedroom Co-op with Private Entrance...,COOP,61-15 98th St Unit ST-F,61-15 98th St,...,,,3908617.0,O-LINY-EXP05,EXP Realty,nybroker@exprealty.net,"[{'number': '8882760630', 'type': 'Office', 'p...","New York City Special Schools - District 75, N...",http://ap.rdcpix.com/38455524dbd211be64139efb7...,http://ap.rdcpix.com/38455524dbd211be64139efb7...
6412,https://www.realtor.com/realestateandhomes-det...,4377385282,2979785076,RPNY,9913102,FOR_SALE,,MULTI_FAMILY,137A 22nd St,137A 22nd St,...,,,,O-RPNY-DECO,DECO - Decode Real Estate REBNY,,,"New York City Special Schools - District 75, H...",http://ap.rdcpix.com/fda5752ca5fe57406b6a9c75c...,http://ap.rdcpix.com/fda5752ca5fe57406b6a9c75c...
6413,https://www.realtor.com/realestateandhomes-det...,4901716421,2979786416,LINY,840377,FOR_SALE,Spacious 1 bed 1 bath with double terrace sunn...,COOP,98-10 64th Ave Unit 4D,98-10 64th Ave,...,,,2216822.0,O-LINY-IGRA01,E Z Sell Realty,olga@ezsellrealty.com,"[{'number': '7183967653', 'type': 'Office', 'p...","New York City Special Schools - District 75, N...",http://ap.rdcpix.com/89723b9129a083274f3be153a...,http://ap.rdcpix.com/89723b9129a083274f3be153a...


In [None]:
def pre_process(data):
    """
    Preprocess the data
    data: Pandas DataFrame
    return: Pandas DataFrame
    """
    trimColumns = ['property_url','property_id','listing_id','mls','mls_id','status','text','full_street_line','street','unit','city','state','zip_code','days_on_mls','list_price_min','list_price_max','sold_price','last_sold_date','assessed_value','estimated_value','tax','tax_history','new_construction','lot_sqft','price_per_sqft','latitude','longitude','neighborhoods','county','fips_code','hoa_fee','agent_id','agent_name','agent_email','agent_phones','agent_mls_set','agent_nrds_id','broker_id','broker_name','builder_id','builder_name','office_id','office_mls_set','office_name','office_email','office_phones','primary_photo','alt_photos']
    data = data.drop(trimColumns, axis = 1)
    data = data.dropna(subset=['sqft'])
    return data
clean_data = pre_process(data)
display(clean_data)

Unnamed: 0,style,beds,full_baths,half_baths,sqft,year_built,list_price,list_date,stories,parking_garage,nearby_schools
0,SINGLE_FAMILY,3.0,2.0,,1060.0,1901.0,180000.0,2025-04-25,,,"New York City Geographic District #11, Bronx C..."
2,COOP,0.0,1.0,,877.0,1939.0,300000.0,2025-04-25,,,"New York City Special Schools - District 75, N..."
4,CONDOS,2.0,1.0,1.0,816.0,1979.0,498888.0,2025-04-25,2.0,,"NEW VENTURES CHARTER SCHOOL, New York City Geo..."
6,COOP,2.0,2.0,,1370.0,1929.0,1675000.0,2025-04-25,,,New York City Geographic District # 3
7,CONDOS,3.0,3.0,,2041.0,2006.0,5595000.0,2025-04-25,2.0,,New York City Geographic District # 2
...,...,...,...,...,...,...,...,...,...,...,...
6410,TOWNHOMES,6.0,5.0,2.0,4441.0,1860.0,7750000.0,2025-03-26,,,New York City Geographic District # 2
6411,COOP,1.0,1.0,,728.0,1960.0,279000.0,2025-03-26,1.0,,"New York City Special Schools - District 75, N..."
6412,MULTI_FAMILY,5.0,5.0,,2400.0,,1900000.0,2025-03-26,2.0,,"New York City Special Schools - District 75, H..."
6413,COOP,1.0,1.0,,750.0,1949.0,289000.0,2025-03-26,,,"New York City Special Schools - District 75, N..."
