In [32]:
from homeharvest import scrape_property
from datetime import datetime
import pandas as pd
from math import radians, cos, sin, asin, sqrt

In [2]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows
pd.set_option('display.width', None)        # Auto-adjust display width to fit console
pd.set_option('display.max_colwidth', 50)   # Limit max column width to 50 characters

In [18]:
# Generate filename based on current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"D:/Niket/HomeHarvest_{current_timestamp}_2.csv"

In [None]:
properties = scrape_property(
  location="shadyside, pittsburgh, PA",
  listing_type="for_rent"  # or (for_sale, for_rent, pending)
  #past_days=30,  # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
  
  # date_from="2023-05-01", # alternative to past_days 
  # date_to="2023-05-28", 
  # foreclosure=True
  
  # mls_only=True,  # only fetch MLS listings
)
display(properties)

In [None]:
from IPython.display import display, HTML
properties['property_url'] = '<a href="' + properties['property_url'] + '" target="_blank">' + properties['property_url'] + '</a>'

html = properties.to_html(escape=False)
truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'
display(HTML(truncate_width))

In [20]:
properties_cmu = scrape_property(
  location="carnegie mellon university",
  listing_type="for_rent",
  radius = 3  # or (for_sale, for_rent, pending)
  #past_days=30,  # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
  
  # date_from="2023-05-01", # alternative to past_days 
  # date_to="2023-05-28", 
  # foreclosure=True
  
  # mls_only=True,  # only fetch MLS listings
)
#display(properties)

In [17]:
print(type(properties_cmu))

<class 'pandas.core.frame.DataFrame'>


In [None]:
properties_cmu['property_url'] = '<a href="' + properties_cmu['property_url'] + '" target="_blank">' + properties_cmu['property_url'] + '</a>'

html = properties_cmu.to_html(escape=False)
truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'
display(HTML(truncate_width))

In [16]:
properties_cmu.to_csv(filename,index = False)
print(properties_cmu.head())

                                        property_url   mls   mls_id    status  \
0  https://www.realtor.com/realestateandhomes-det...  PPPA  1646288  FOR_RENT   
1  https://www.realtor.com/realestateandhomes-det...  TRBO  1439210  FOR_RENT   
2  https://www.realtor.com/realestateandhomes-det...  TRBO  1173412  FOR_RENT   
3  https://www.realtor.com/realestateandhomes-det...  RNTR  9042513  FOR_RENT   
4  https://www.realtor.com/realestateandhomes-det...  PPPA  1646235  FOR_RENT   

                        style                   street     unit        city  \
0         PropertyType.CONDOS      1700  Grandview Ave  Apt 703  Pittsburgh   
1         PropertyType.CONDOS        908  Bryn Mawr Rd    Apt 3  Pittsburgh   
2  PropertyType.SINGLE_FAMILY        3147  Brighton Rd     None  Pittsburgh   
3      PropertyType.APARTMENT     2425  Beechwood Blvd   Unit 1  Pittsburgh   
4         PropertyType.CONDOS  320  Fort Duquesne Blvd  Apt 26O  Pittsburgh   

  state zip_code beds full_baths half_

In [None]:
filtered_properties_shadyside = properties_cmu[(properties_cmu['zip_code']=='15206')|(properties_cmu['zip_code']=='15213')|(properties_cmu['zip_code']=='15232')]
filtered_properties_shadyside

In [26]:
filtered_properties_shadyside.to_csv(filename,index = False)
print(filtered_properties_shadyside.head())

                                         property_url   mls    mls_id  \
12  https://www.realtor.com/realestateandhomes-det...  AVAL  60304868   
14  https://www.realtor.com/realestateandhomes-det...  AVAL  60970384   
16  https://www.realtor.com/realestateandhomes-det...  AVAL  61041877   
20  https://www.realtor.com/realestateandhomes-det...  RLXB   6441990   
21  https://www.realtor.com/realestateandhomes-det...  RLXB   5680101   

      status                   style               street     unit  \
12  FOR_RENT  PropertyType.APARTMENT       260  Shady Ave   Apt 10   
14  FOR_RENT  PropertyType.APARTMENT    5432  Stanton Ave  Unit 1F   
16  FOR_RENT  PropertyType.APARTMENT       265  Shady Ave   Apt 11   
20  FOR_RENT  PropertyType.APARTMENT      732  Filbert St     None   
21  FOR_RENT  PropertyType.APARTMENT  5806  Ellsworth Ave     None   

          city state zip_code  beds full_baths half_baths  sqft year_built  \
12  Pittsburgh    PA    15206     1          1       None  Non

In [28]:
cmu_lat = 40.443798
cmu_lon = -79.945321

In [34]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points
    on the Earth's surface (specified in decimal degrees)
    """
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return c * r

In [39]:
filtered_properties_shadyside['Distance_from_CMU'] = filtered_properties_shadyside.apply(lambda row: haversine_distance(row['latitude'], row['longitude'], cmu_lat, cmu_lon), axis=1)

#sorting the entries by distance
shadyside = filtered_properties_shadyside.sort_values(by='Distance_from_CMU')

In [40]:
filename2 = f"D:/Niket/HomeHarvest_{current_timestamp}_3.csv"
shadyside.to_csv(filename2,index = False)
print(shadyside.head())

                                           property_url   mls     mls_id  \
135   https://www.realtor.com/realestateandhomes-det...  RNTR    9023977   
1315  https://www.realtor.com/realestateandhomes-det...  RENT  100029944   
850   https://www.realtor.com/realestateandhomes-det...  ABOD       2268   
845   https://www.realtor.com/realestateandhomes-det...  ABOD       2267   
667   https://www.realtor.com/realestateandhomes-det...  ABOD    4656639   

        status                       style           street  unit        city  \
135   FOR_RENT  PropertyType.SINGLE_FAMILY   3  Boundary St  None  Pittsburgh   
1315  FOR_RENT      PropertyType.APARTMENT   135 N Craig St  None  Pittsburgh   
850   FOR_RENT      PropertyType.APARTMENT   147 N Craig St  None  Pittsburgh   
845   FOR_RENT      PropertyType.APARTMENT   151 N Craig St  None  Pittsburgh   
667   FOR_RENT      PropertyType.APARTMENT  4628  Bayard St  None  Pittsburgh   

     state zip_code  beds full_baths half_baths  sqft ye