In [None]:
import requests
import folium
from IPython.display import display

# API Key removed
# API_KEY = ""

# Create map centered on Austin
m = folium.Map(location=[30.2672, -97.7431], zoom_start=12)
m.add_child(folium.LatLngPopup())

display(m)
print("Click on the map to get coordinates, then use them below:")

# Get location from user
lat = float(input("Enter latitude: "))
lng = float(input("Enter longitude: "))

# Categories to search
categories = [
    'grocery_or_supermarket',
    'pharmacy',
    'hospital',
    'doctor',
    'dentist',
    'veterinary_care',
    'restaurant',
    'cafe',
    'bar',
    'gym',
    'park',
    'spa',
    'shopping_mall',
    'convenience_store',
    'transit_station',
    'bus_station',
    'subway_station',
    'gas_station',
    'movie_theater',
    'museum',
    'library',
    'night_club',
    'school',
    'university',
    'bank',
    'atm',
    'post_office',
    'laundry',
    'police',
    'fire_station'
]

# Calculate 4 points in a square pattern around center
offset_miles = 5.0
lat_offset = offset_miles / 69.0
lng_offset = offset_miles / 54.6

search_points = [
    (lat + lat_offset, lng + lng_offset),  # Northeast
    (lat + lat_offset, lng - lng_offset),  # Northwest
    (lat - lat_offset, lng + lng_offset),  # Southeast
    (lat - lat_offset, lng - lng_offset),  # Southwest
]

url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
all_results = {}

print(f"\nSearching {len(categories)} categories across 4 locations...")
print("This may take a minute...\n")

for category in categories:
    category_count = 0
    for search_lat, search_lng in search_points:
        params = {
            "location": f"{search_lat},{search_lng}",
            "radius": 8000,  # 5 miles per search point
            "type": category,
            "key": API_KEY
        }

        response = requests.get(url, params=params)
        data = response.json()

        if data['status'] == 'OK':
            for place in data['results']:
                place_id = place['place_id']
                if place_id not in all_results:
                    all_results[place_id] = place
                    category_count += 1

    print(f"✓ {category}: {category_count} new places found")

# Convert back to list
data = {'results': list(all_results.values())}

print(f"\n{'='*60}")
print(f"Total unique places found: {len(data['results'])}")
print(f"{'='*60}")
print(data)

Click on the map to get coordinates, then use them below:
Enter latitude: 30.1985
Enter longitude: -97.7377

Searching 30 categories across 4 locations...
This may take a minute...

✓ grocery_or_supermarket: 37 new places found
✓ pharmacy: 61 new places found
✓ hospital: 48 new places found
✓ doctor: 63 new places found
✓ dentist: 58 new places found
✓ veterinary_care: 51 new places found
✓ restaurant: 80 new places found
✓ cafe: 56 new places found
✓ bar: 51 new places found
✓ gym: 63 new places found
✓ park: 80 new places found
✓ spa: 56 new places found
✓ shopping_mall: 39 new places found
✓ convenience_store: 71 new places found
✓ transit_station: 72 new places found
✓ bus_station: 21 new places found
✓ subway_station: 0 new places found
✓ gas_station: 48 new places found
✓ movie_theater: 12 new places found
✓ museum: 34 new places found
✓ library: 22 new places found
✓ night_club: 42 new places found
✓ school: 67 new places found
✓ university: 38 new places found
✓ bank: 60 new pl

In [None]:
for place in data['results']:
    name = place['name']
    rating = place.get('rating', 'N/A')

    # Get coordinates
    lat = place['geometry']['location']['lat']
    lng = place['geometry']['location']['lng']

    # Get categories/types (Google returns a list of types)
    categories = ', '.join(place.get('types', []))

    print(f"{name}")
    print(f"  Rating: {rating}")
    print(f"  Coordinates: ({lat}, {lng})")
    print(f"  Categories: {categories}")
    print("-" * 50)

Fiesta Mart #25
  Rating: 4.2
  Coordinates: (30.2960976, -97.7174677)
  Categories: grocery_or_supermarket, supermarket, bakery, store, food, point_of_interest, establishment
--------------------------------------------------
JD’s Super Market #7
  Rating: 4
  Coordinates: (30.2904173, -97.6293529)
  Categories: supermarket, gas_station, florist, grocery_or_supermarket, store, restaurant, food, point_of_interest, establishment
--------------------------------------------------
Royal Blue Grocery
  Rating: 4.3
  Coordinates: (30.2624553, -97.7252084)
  Categories: grocery_or_supermarket, convenience_store, cafe, supermarket, liquor_store, store, food, point_of_interest, health, establishment
--------------------------------------------------
Los Vaqueros Supermercado
  Rating: 4.2
  Coordinates: (30.3202307, -97.69038220000002)
  Categories: supermarket, grocery_or_supermarket, bakery, store, restaurant, food, point_of_interest, establishment
-------------------------------------------

In [None]:
# This cell does the grouping for neighborhood level average sentiment with the reddit posts attatched.

import pandas as pd

apt_df = pd.read_csv('/content/apartments_with_neighborhood.csv')
reddit_df = pd.read_csv('/content/reddit_posts_with_neighborhoods (1).csv')

reddit_df.head()
rgrouped = reddit_df.groupby('neighborhood_assigned')['sentiment_score'].mean().reset_index()
rgrouped.head()

reddit_df = pd.merge(reddit_df, rgrouped, on='neighborhood_assigned', how='left')
reddit_df = reddit_df.rename(columns={'sentiment_score_y': 'Average_Sentiment'})
reddit_df.head()

# Join apartment data with neighborhood sentiment scores
apt_df = pd.merge(
    apt_df,
    rgrouped,
    left_on='neighborhood_group',
    right_on='neighborhood_assigned',
    how='left'
)

# Rename the sentiment column to be clearer
apt_df = apt_df.rename(columns={'sentiment_score': 'sentiment_score'})

# Drop the duplicate neighborhood column if you don't need it
apt_df = apt_df.drop(columns=['neighborhood_assigned'], errors='ignore')

# Normalize sentiment scores to 0-100 scale
# Assuming sentiment_score is between -1 and 1 (adjust if different)
if 'sentiment_score' in apt_df.columns:
    apt_df['sentiment_score'] = ((apt_df['sentiment_score'] + 1) / 2) * 100
    apt_df['sentiment_score'] = apt_df['sentiment_score'].fillna(50)  # Fill missing with neutral

print(f"Apartments with sentiment scores: {apt_df['sentiment_score'].notna().sum()}/{len(apt_df)}")
apt_df.head()

Apartments with sentiment scores: 150/150


Unnamed: 0,listing_id,name,address,price,bedrooms,bathrooms,sqft,url,image_url,zip,neighborhood_group,sentiment_score
0,apt_1,The Summit at The Reserve,"725 University Blvd, Round Rock, TX 78665","$1,399 - $2,800",1-2 Beds,,,https://www.apartments.com/the-summit-at-the-r...,https://images1.apartments.com/i2/Xdm0jW8F5j9X...,78665,Round Rock / Pflugerville / Hutto,61.060714
1,apt_2,Luxe of Buda,"1664 Hillside Ter, Buda, TX 78610","$1,297 - $7,599",1-4 Beds,,,https://www.apartments.com/luxe-of-buda-buda-t...,https://images1.apartments.com/i2/Pj5KKd536h1m...,78610,Buda / Kyle / Creedmoor,45.575
2,apt_3,Del Via Apartments,"11600 Spirit Dr, Del Valle, TX 78617","$1,185 - $2,355",Studio - 3 Beds,,,https://www.apartments.com/del-via-apartments-...,https://images1.apartments.com/i2/ZOMQeQnrNgzt...,78617,Bastrop / Del Valle / Manor,50.84375
3,apt_4,Woodview,"691 Union Chapel Rd, Cedar Creek, TX 78612","$1,329 - $2,259",Studio - 3 Beds,,,https://www.apartments.com/woodview-cedar-cree...,https://images1.apartments.com/i2/d86O5UCtOc5C...,78612,Bastrop / Del Valle / Manor,50.84375
4,apt_5,Ellwood at Lake Travis,"7655 N Ranch Road 620, Austin, TX 78726","$995 - $3,085",1-3 Beds,,,https://www.apartments.com/ellwood-at-lake-tra...,https://images1.apartments.com/i2/Eb86_um5jntA...,78726,Northwest Austin / Lake Travis,70.894659


In [None]:
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time

# Initialize geocoder
geolocator = Nominatim(user_agent="apartment_geocoder")

def geocode_address(address):
    """Geocode an address and return coordinates as a tuple"""
    try:
        location = geolocator.geocode(address, timeout=10)
        if location:
            return (location.latitude, location.longitude)
        else:
            return None
    except (GeocoderTimedOut, GeocoderServiceError) as e:
        print(f"Error geocoding {address}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error for {address}: {e}")
        return None

# Apply geocoding to the dataframe
# Add a small delay to respect rate limits (Nominatim requires 1 second between requests)
coords_list = []
for idx, address in enumerate(apt_df['address']):
    print(f"Geocoding {idx+1}/{len(apt_df)}: {address}")
    coords = geocode_address(address)
    coords_list.append(coords)
    time.sleep(1)  # Required for Nominatim - 1 request per second

apt_df['coords'] = coords_list

# Optional: Split into separate lat/lon columns
apt_df['latitude'] = apt_df['coords'].apply(lambda x: x[0] if x else None)
apt_df['longitude'] = apt_df['coords'].apply(lambda x: x[1] if x else None)

print(apt_df[['address', 'coords', 'latitude', 'longitude']].head())


# Remove records where the address couldnt fill in

apt_df = apt_df.dropna(subset=['latitude', 'longitude'])
apt_df.head()

Geocoding 1/150: 725 University Blvd, Round Rock, TX 78665
Geocoding 2/150: 1664 Hillside Ter, Buda, TX 78610
Geocoding 3/150: 11600 Spirit Dr, Del Valle, TX 78617
Geocoding 4/150: 691 Union Chapel Rd, Cedar Creek, TX 78612
Geocoding 5/150: 7655 N Ranch Road 620, Austin, TX 78726
Geocoding 6/150: 12820 W Parmer Ln, Cedar Park, TX 78613
Geocoding 7/150: 141 Opal Ln, Kyle, TX 78640
Geocoding 8/150: 8110 Blue Goose Rd, Manor, TX 78653
Geocoding 9/150: 801 Steel Timber Dr, Manchaca, TX 78652
Geocoding 10/150: 4711 E Riverside Dr, Austin, TX 78741
Geocoding 11/150: 5511 Caprock Summit Dr, Bee Cave, TX 78738
Geocoding 12/150: 2001 Plateau Vista Blvd, Round Rock, TX 78665
Geocoding 13/150: 10525 Wildhorse Ranch Trl, Manor, TX 78724
Geocoding 14/150: 8721 Eastern Heights Blvd, Austin, TX 78724
Geocoding 15/150: 8421 183A Toll Rd, Leander, TX 78641
Geocoding 16/150: 2711 W Anderson Ln, Austin, TX 78757
Geocoding 17/150: 149 Larkspur Park Blvd, Leander, TX 78641
Geocoding 18/150: 476 Suffield Dr

Unnamed: 0,listing_id,name,address,price,bedrooms,bathrooms,sqft,url,image_url,zip,neighborhood_group,sentiment_score,coords,latitude,longitude
0,apt_1,The Summit at The Reserve,"725 University Blvd, Round Rock, TX 78665","$1,399 - $2,800",1-2 Beds,,,https://www.apartments.com/the-summit-at-the-r...,https://images1.apartments.com/i2/Xdm0jW8F5j9X...,78665,Round Rock / Pflugerville / Hutto,61.060714,"(30.5643281, -97.6732967)",30.564328,-97.673297
1,apt_2,Luxe of Buda,"1664 Hillside Ter, Buda, TX 78610","$1,297 - $7,599",1-4 Beds,,,https://www.apartments.com/luxe-of-buda-buda-t...,https://images1.apartments.com/i2/Pj5KKd536h1m...,78610,Buda / Kyle / Creedmoor,45.575,"(30.0587853, -97.8057098)",30.058785,-97.80571
4,apt_5,Ellwood at Lake Travis,"7655 N Ranch Road 620, Austin, TX 78726","$995 - $3,085",1-3 Beds,,,https://www.apartments.com/ellwood-at-lake-tra...,https://images1.apartments.com/i2/Eb86_um5jntA...,78726,Northwest Austin / Lake Travis,70.894659,"(30.4653474, -97.8059061)",30.465347,-97.805906
5,apt_6,Bexley at Silverado,"12820 W Parmer Ln, Cedar Park, TX 78613","$1,049 - $2,285",1-3 Beds,,,https://www.apartments.com/bexley-at-silverado...,https://images1.apartments.com/i2/VXP3x4mGTWK3...,78613,Cedar Park / Leander / Liberty Hill,38.705625,"(30.5324006, -97.7788757)",30.532401,-97.778876
6,apt_7,Opal Point at Kyle,"141 Opal Ln, Kyle, TX 78640","$1,363 - $2,981",Studio - 2 Beds,,,https://www.apartments.com/opal-point-at-kyle-...,https://images1.apartments.com/i2/G7K4QY5cFBbQ...,78640,Buda / Kyle / Creedmoor,45.575,"(29.9754614, -97.8836218)",29.975461,-97.883622


In [None]:
# Filter for distance to apartment
# 5 Mile threshold

from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance in miles between two points
    on the earth (specified in decimal degrees)
    """
    # Convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    miles = 3956 * c  # Radius of earth in miles
    return miles

# Create new dataframe with only nearby apartments
nearby_apartments = apt_df.copy()

# Calculate distance for each apartment
nearby_apartments['distance_miles'] = nearby_apartments.apply(
    lambda row: haversine(row['longitude'], row['latitude'], lng, lat)
    if pd.notna(row['latitude']) and pd.notna(row['longitude'])
    else None,
    axis=1
)

# Filter to only apartments within 5 miles
nearby_apartments = nearby_apartments[nearby_apartments['distance_miles'] <= 5.0]

# Sort by distance
nearby_apartments = nearby_apartments.sort_values('distance_miles').reset_index(drop=True)

print(f"\nFound {len(nearby_apartments)} apartments within 5 miles")
print(nearby_apartments[['address', 'distance_miles']])

# Original apt_df is unchanged


Found 16 apartments within 5 miles
                                      address  distance_miles
0            8600 Brodie Ln, Austin, TX 78745        0.295957
1        5701 S MO PAC Expy, Austin, TX 78749        2.433124
2         7631 US Hwy 290 W, Austin, TX 78736        3.212083
3   739 William Cannon Dr W, Austin, TX 78745        3.342548
4     211 Ralph Ablanedo Dr, Austin, TX 78748        3.642176
5            10001 S 1st St, Austin, TX 78748        3.672490
6       8200 S Congress Ave, Austin, TX 78745        3.896503
7     8600 State Highway 71, Austin, TX 78735        4.143081
8       5900 S Congress Ave, Austin, TX 78745        4.167721
9           4009 Victory Dr, Austin, TX 78704        4.283284
10    7330 Bluff Springs Rd, Austin, TX 78744        4.637583
11    8340 Bluff Springs Rd, Austin, TX 78744        4.665307
12     2000 Onion Creek Pky, Austin, TX 78748        4.729858
13    6307 Bluff Springs Rd, Austin, TX 78744        4.787972
14     1900 Onion Creek Pky, Austi

Claude Categories:
Essential Services:

grocery_or_supermarket - Grocery stores
pharmacy - Pharmacies
hospital - Hospitals/Emergency care
doctor - Medical clinics
dentist - Dental offices
veterinary_care - Vet clinics (if you have pets)

Food & Social:

restaurant - Restaurants
cafe - Coffee shops
bar - Bars/Pubs

Fitness & Wellness:

gym - Gyms/Fitness centers
park - Parks & green spaces
spa - Spas/Wellness centers

Shopping:

shopping_mall - Shopping centers
grocery_or_supermarket - (already listed above)
convenience_store - Quick shopping

Transportation:

transit_station - Public transit stops
bus_station - Bus stops
subway_station - Metro/Subway
gas_station - Gas stations

Entertainment & Culture:

movie_theater - Cinemas
museum - Museums
library - Libraries
night_club - Nightlife

Education (if relevant):

school - Schools
university - Colleges/Universities

Convenience:

bank - Banks
atm - ATMs
post_office - Post offices
laundry - Laundromats

Safety:

police - Police stations
fire_station - Fire stations

In [None]:
# scoring system.

import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Load a pretrained sentence transformer model for semantic similarity
model = SentenceTransformer('all-MiniLM-L6-v2')

# Google Places categories from your API call
google_categories = [
    'grocery_or_supermarket', 'pharmacy', 'hospital', 'doctor', 'dentist',
    'veterinary_care', 'restaurant', 'cafe', 'bar', 'gym', 'park', 'spa',
    'shopping_mall', 'convenience_store', 'transit_station', 'bus_station',
    'subway_station', 'gas_station', 'movie_theater', 'museum', 'library',
    'night_club', 'school', 'university', 'bank', 'atm', 'post_office',
    'laundry', 'police', 'fire_station'
]

def match_user_input_to_categories(user_input, categories,
                                   low_threshold=0.5,
                                   high_threshold=0.7,
                                   super_threshold=0.85):
    """
    Match user input to Google Places categories using semantic similarity.

    Returns:
        list of tuples: (category, confidence_score, bonus_multiplier)
    """
    # Encode user input and categories
    user_embedding = model.encode([user_input])
    category_embeddings = model.encode(categories)

    # Calculate cosine similarity
    similarities = cosine_similarity(user_embedding, category_embeddings)[0]

    # Filter and assign bonuses based on confidence
    matches = []
    for category, score in zip(categories, similarities):
        if score < low_threshold:
            continue  # Ignore low confidence
        elif score >= super_threshold:
            matches.append((category, score, 1.5))  # Super confident bonus
        elif score >= high_threshold:
            matches.append((category, score, 1.2))  # High confident bonus
        else:
            matches.append((category, score, 1.0))  # Regular match

    return sorted(matches, key=lambda x: x[1], reverse=True)


def calculate_amenity_score(apt_coords, places_data, user_input, matched_categories,
                            max_distance_miles=5.0,
                            category_weight=0.7,
                            name_weight=0.3):
    """
    Calculate amenity score based on proximity to matched places.
    Uses BOTH category matching AND name similarity for better classification.

    Args:
        apt_coords: (lat, lng) tuple for apartment
        places_data: dict with 'results' containing Google Places results
        user_input: original user query (e.g., "burger", "italian food")
        matched_categories: list of (category, confidence, bonus) tuples
        max_distance_miles: maximum distance to consider
        category_weight: how much category match matters (default 0.4)
        name_weight: how much name match matters (default 0.6)

    Returns:
        float: amenity score (0-100)
    """
    from math import radians, cos, sin, asin, sqrt

    def haversine(lon1, lat1, lon2, lat2):
        lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
        dlon = lon2 - lon1
        dlat = lat2 - lat1
        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
        c = 2 * asin(sqrt(a))
        return 3956 * c  # miles

    if not matched_categories:
        return 0

    apt_lat, apt_lng = apt_coords

    # Encode user input once for name comparisons
    user_embedding = model.encode([user_input])

    # Get all matching places
    matching_places = []
    for place in places_data['results']:
        place_types = place.get('types', [])
        place_name = place.get('name', '')

        # Check if place matches any of our categories
        category_match = None
        for category, confidence, bonus in matched_categories:
            if category in place_types:
                category_match = (confidence, bonus)
                break

        if category_match:
            # Calculate name similarity
            name_embedding = model.encode([place_name])
            name_similarity = cosine_similarity(user_embedding, name_embedding)[0][0]

            # Combined confidence score:
            # Mix category confidence and name similarity
            category_confidence, category_bonus = category_match
            combined_confidence = (
                category_confidence * category_weight +
                name_similarity * name_weight
            )

            # Bonus multiplier: higher if both name and category match well
            if name_similarity > 0.7 and category_confidence > 0.7:
                final_bonus = category_bonus * 1.5  # Both match really well
            elif name_similarity > 0.5:
                final_bonus = category_bonus * 1.2  # Name matches decently
            else:
                final_bonus = category_bonus  # Just category match

            place_lat = place['geometry']['location']['lat']
            place_lng = place['geometry']['location']['lng']
            distance = haversine(apt_lng, apt_lat, place_lng, place_lat)

            if distance <= max_distance_miles:
                matching_places.append({
                    'name': place_name,
                    'distance': distance,
                    'combined_confidence': combined_confidence,
                    'name_similarity': name_similarity,
                    'category_confidence': category_confidence,
                    'bonus': final_bonus
                })

    if not matching_places:
        return 0

    # Sort by combined score (confidence * distance proximity)
    for place in matching_places:
        distance_score = (max_distance_miles - place['distance']) / max_distance_miles * 100
        place['weighted_score'] = distance_score * place['bonus'] * place['combined_confidence']

    matching_places.sort(key=lambda x: x['weighted_score'], reverse=True)
    top_places = matching_places[:3]

    # Calculate final score
    total_score = 0
    for place in top_places:
        total_score += place['weighted_score']

    # Average and normalize
    amenity_score = total_score / len(top_places)
    return min(amenity_score, 100)  # Cap at 100


def calculate_final_apartment_score(apt_row, places_data, user_amenities,
                                    sentiment_weight=0.4,
                                    distance_weight=0.3,
                                    amenity_weight=0.3,
                                    max_distance_miles=5.0):
    """
    Calculate final apartment score combining sentiment, distance, and amenities.

    Args:
        apt_row: DataFrame row with apartment data
        places_data: Google Places API results
        user_amenities: list of amenities user cares about (e.g., ['elementary schools', 'parks'])
        sentiment_weight: weight for sentiment score
        distance_weight: weight for distance score
        amenity_weight: weight for amenities score
        max_distance_miles: max distance for scoring

    Returns:
        dict with breakdown of scores
    """
    # 1. Sentiment score (assume already 0-100)
    sentiment_score = apt_row.get('sentiment_score', 50)  # default to 50 if missing

    # 2. Distance score
    distance = apt_row['distance_miles']
    distance_score = (max_distance_miles - distance) / max_distance_miles * 100

    # 3. Amenity scores
    apt_coords = apt_row['coords']
    amenity_scores = []

    for amenity in user_amenities:
        # Match user amenity to Google categories
        matches = match_user_input_to_categories(amenity, google_categories)

        if matches:
            # Calculate score for this amenity (now includes user_input for name matching)
            score = calculate_amenity_score(apt_coords, places_data, amenity, matches, max_distance_miles)
            amenity_scores.append(score)

    # Average amenity scores
    avg_amenity_score = np.mean(amenity_scores) if amenity_scores else 0

    # 4. Calculate weighted final score
    final_score = (
        sentiment_score * sentiment_weight +
        distance_score * distance_weight +
        avg_amenity_score * amenity_weight
    )

    return {
        'final_score': final_score,
        'sentiment_score': sentiment_score,
        'distance_score': distance_score,
        'amenity_score': avg_amenity_score,
        'individual_amenity_scores': amenity_scores
    }


# Example usage:

# User inputs what they care about
user_amenities = ['College', 'Thrift Shop', 'Nightclub']

# Score all apartments
scored_apartments = []
for idx, apt in nearby_apartments.iterrows():
    scores = calculate_final_apartment_score(
        apt,
        data,  # Google Places data from your API call
        user_amenities,
        sentiment_weight=0.4,
        distance_weight=0.3,
        amenity_weight=0.3
    )

    scored_apartments.append({
        'address': apt['address'],
        **scores
    })

# Convert to DataFrame and sort
results_df = pd.DataFrame(scored_apartments)
results_df = results_df.sort_values('final_score', ascending=False)

print(results_df)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

                                      address  final_score  sentiment_score  \
0            8600 Brodie Ln, Austin, TX 78745    74.721649        76.978086   
1        5701 S MO PAC Expy, Austin, TX 78749    63.039032        76.978086   
3   739 William Cannon Dr W, Austin, TX 78745    60.142731        76.978086   
9           4009 Victory Dr, Austin, TX 78704    57.032841        76.978086   
6       8200 S Congress Ave, Austin, TX 78745    56.829159        76.978086   
5            10001 S 1st St, Austin, TX 78748    54.712120        63.858224   
8       5900 S Congress Ave, Austin, TX 78745    54.248863        76.978086   
4     211 Ralph Ablanedo Dr, Austin, TX 78748    54.116806        63.858224   
15      4411 S Congress Ave, Austin, TX 78745    51.350174        76.978086   
2         7631 US Hwy 290 W, Austin, TX 78736    48.862595        61.679786   
10    7330 Bluff Springs Rd, Austin, TX 78744    44.618675        63.858224   
11    8340 Bluff Springs Rd, Austin, TX 78744    44.

In [None]:
# Example usage:

# User inputs what they care about
user_amenities = ['Elementary School', 'pediatrician', 'Grocery Store']

# Score all apartments
scored_apartments = []
for idx, apt in nearby_apartments.iterrows():
    scores = calculate_final_apartment_score(
        apt,
        data,  # Google Places data from your API call
        user_amenities,
        sentiment_weight=0.4,
        distance_weight=0.3,
        amenity_weight=0.3
    )

    scored_apartments.append({
        'address': apt['address'],
        **scores
    })

# Convert to DataFrame and sort
results_df = pd.DataFrame(scored_apartments)
results_df = results_df.sort_values('final_score', ascending=False)

print(results_df)

                                      address  final_score  sentiment_score  \
0            8600 Brodie Ln, Austin, TX 78745    79.485855        76.978086   
1        5701 S MO PAC Expy, Austin, TX 78749    66.038200        76.978086   
3   739 William Cannon Dr W, Austin, TX 78745    57.338161        76.978086   
9           4009 Victory Dr, Austin, TX 78704    55.372742        76.978086   
6       8200 S Congress Ave, Austin, TX 78745    54.768147        76.978086   
5            10001 S 1st St, Austin, TX 78748    54.329552        63.858224   
4     211 Ralph Ablanedo Dr, Austin, TX 78748    52.430550        63.858224   
8       5900 S Congress Ave, Austin, TX 78745    50.302139        76.978086   
2         7631 US Hwy 290 W, Austin, TX 78736    49.320190        61.679786   
15      4411 S Congress Ave, Austin, TX 78745    47.500946        76.978086   
12     2000 Onion Creek Pky, Austin, TX 78748    44.734573        63.858224   
14     1900 Onion Creek Pky, Austin, TX 78748    44.

In [None]:

# Example usage:

# User inputs what they care about
user_amenities = ['College', 'Thrift Shop', 'Nightclub']

# Score all apartments
scored_apartments = []
for idx, apt in nearby_apartments.iterrows():
    scores = calculate_final_apartment_score(
        apt,
        data,  # Google Places data from your API call
        user_amenities,
        sentiment_weight=0.4,
        distance_weight=0.3,
        amenity_weight=0.3
    )

    scored_apartments.append({
        'address': apt['address'],
        **scores
    })

# Convert to DataFrame and sort
college_results_df = pd.DataFrame(scored_apartments)
college_results_df = college_results_df.sort_values('final_score', ascending=False)

print(results_df)

                                      address  final_score  sentiment_score  \
0            8600 Brodie Ln, Austin, TX 78745    79.485855        76.978086   
1        5701 S MO PAC Expy, Austin, TX 78749    66.038200        76.978086   
3   739 William Cannon Dr W, Austin, TX 78745    57.338161        76.978086   
9           4009 Victory Dr, Austin, TX 78704    55.372742        76.978086   
6       8200 S Congress Ave, Austin, TX 78745    54.768147        76.978086   
5            10001 S 1st St, Austin, TX 78748    54.329552        63.858224   
4     211 Ralph Ablanedo Dr, Austin, TX 78748    52.430550        63.858224   
8       5900 S Congress Ave, Austin, TX 78745    50.302139        76.978086   
2         7631 US Hwy 290 W, Austin, TX 78736    49.320190        61.679786   
15      4411 S Congress Ave, Austin, TX 78745    47.500946        76.978086   
12     2000 Onion Creek Pky, Austin, TX 78748    44.734573        63.858224   
14     1900 Onion Creek Pky, Austin, TX 78748    44.

In [None]:
college_results_df.to_csv('college_results.csv', index=False)
results_df.to_csv('results.csv', index=False)
print('done')

done


In [None]:
reddit_df.groupby('neighborhood_assigned')['Average_Sentiment'].mean()

Unnamed: 0_level_0,Average_Sentiment
neighborhood_assigned,Unnamed: 1_level_1
Bastrop / Del Valle / Manor,0.016875
Bee Cave / Lakeway / Dripping Springs,0.233596
Buda / Kyle / Creedmoor,-0.0885
Cedar Park / Leander / Liberty Hill,-0.225887
Downtown / UT / West Campus,0.214517
East Austin,0.256409
Georgetown,0.495827
Marble Falls,0.018036
North Austin / Domain,0.095188
Northwest Austin / Lake Travis,0.417893
