# Find all relevant information near Zonaprop location
- https://www.machinelearnear.com/
- https://www.youtube.com/@machinelearnear

In [1]:
import pandas as pd
import numpy as np

In [2]:
# sample data for zonaprop_listings, single entry
zonaprop_listings = pd.DataFrame(
    {'latitude': [-34.5835146], 'longitude': [-58.4537686]})

## `AirBnB` listings & reviews

In [3]:
from sklearn.neighbors import BallTree

def find_within_radius(reference_df, target_df, radius_km=1):
    # Convert radius from kilometers to radians for BallTree
    radius_rad = radius_km / 6371  # Earth's radius in km

    # Creating BallTree with target data
    tree = BallTree(np.deg2rad(target_df[['latitude', 'longitude']].values), metric='haversine')

    # Initialize DataFrame to store results
    results_df = pd.DataFrame()

    # Iterate over reference DataFrame
    for index, row in reference_df.iterrows():
        # Query BallTree
        indices = tree.query_radius(np.deg2rad([[row['latitude'], row['longitude']]]), r=radius_rad)
        # Extract relevant rows from target DataFrame
        filtered_df = target_df.iloc[indices[0]]
        results_df = pd.concat([results_df, filtered_df])

    return results_df

### Find closest listings

In [4]:
airbnb_listings = pd.read_csv('processed/airbnb_listings.csv')

In [5]:
# sample usage
closest_listings = find_within_radius(
    zonaprop_listings, 
    airbnb_listings,
    radius_km=0.3,
)
closest_listings.head()

Unnamed: 0,id,listing_url,last_scraped,neighbourhood_cleansed,latitude,longitude,room_type,beds,price,number_of_reviews_l30d,review_scores_rating,review_scores_location,review_scores_value,estimated_nights_booked_l30d,bathrooms,estimated_price_per_night_in_USD
1035,3140077,https://www.airbnb.com/rooms/3140077,2023-12-27,Chacarita,-34.58347,-58.45243,Entire home/apt,3.0,43731.0,1,5.0,4.72,4.83,low,1.5,55.0
8659,40677601,https://www.airbnb.com/rooms/40677601,2023-12-27,Chacarita,-34.58407,-58.4527,Entire home/apt,1.0,18367.0,1,4.62,4.67,4.43,low,1.0,23.0
6298,32665979,https://www.airbnb.com/rooms/32665979,2023-12-29,Chacarita,-34.58369,-58.45257,Entire home/apt,1.0,,0,4.77,4.82,4.86,low,1.5,
13822,660221314715899969,https://www.airbnb.com/rooms/660221314715899969,2023-12-27,Chacarita,-34.58368,-58.45293,Entire home/apt,1.0,27550.0,1,4.89,5.0,4.78,low,1.0,35.0
6187,32378359,https://www.airbnb.com/rooms/32378359,2023-12-27,Chacarita,-34.58556,-58.45258,Entire home/apt,1.0,23300.0,0,5.0,5.0,5.0,low,1.0,29.0


In [6]:
# filtering out NaN values for relevant columns
filtered_df = closest_listings.dropna(subset=['estimated_price_per_night_in_USD', 'review_scores_rating', 
                                              'review_scores_location', 'review_scores_value', 'room_type'])

# average estimated price per night in USD, considering room type
average_price_entire_home = filtered_df[filtered_df['room_type'] == 'Entire home/apt']['estimated_price_per_night_in_USD'].mean()
average_price_private_room = filtered_df[filtered_df['room_type'] == 'Private room']['estimated_price_per_night_in_USD'].mean()

# count of estimated nights booked in the last 30 days by category
booking_counts = filtered_df['estimated_nights_booked_l30d'].value_counts()

# average review scores
average_review_score_rating = filtered_df['review_scores_rating'].mean()
average_review_score_location = filtered_df['review_scores_location'].mean()
average_review_score_value = filtered_df['review_scores_value'].mean()

# generate markdown summary
markdown_summary = f"""
### Summary of Airbnb Listings Near Zonaprop Location

- **Average Estimated Price Per Night in USD**:
    - Entire home/apt: {average_price_entire_home:.2f} USD
    - Private room: {average_price_private_room:.2f} USD (if applicable)

- **Booking Likelihood (Last 30 Days)**:
    - Low: {booking_counts.get('low', 0)} listings
    - Medium: {booking_counts.get('medium', 0)} listings
    - High: {booking_counts.get('high', 0)} listings
    - The data suggests that an apartment in this neighbourhood is {'more likely' if (booking_counts.get('high', 0) > booking_counts.get('low', 0)) and (booking_counts.get('high', 0) > booking_counts.get('medium', 0)) else 'less likely'} to get rented.

- **Average Review Scores**:
    - Overall Rating: {average_review_score_rating:.2f}/5
    - Location Rating: {average_review_score_location:.2f}/5
    - Value Rating: {average_review_score_value:.2f}/5
"""

In [7]:
print(markdown_summary)


### Summary of Airbnb Listings Near Zonaprop Location

- **Average Estimated Price Per Night in USD**:
    - Entire home/apt: 41.62 USD
    - Private room: 18.29 USD (if applicable)

- **Booking Likelihood (Last 30 Days)**:
    - Low: 48 listings
    - Medium: 6 listings
    - High: 8 listings
    - The data suggests that an apartment in this neighbourhood is less likely to get rented.

- **Average Review Scores**:
    - Overall Rating: 4.84/5
    - Location Rating: 4.83/5
    - Value Rating: 4.79/5

