In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

df = pd.read_csv('filtered_suburbs_amenity_scores.csv')
df.head(10)

Unnamed: 0,Suburb_name,Coordinates,Eating,Nightlife,Culture,Community,Retail Shopping Centres,Clinic,Dentist,Doctors,Hospital,Pharmacy,Veterinary,Leisure spots,Sports Centres,Leisure Areas,Economic,Education,Public Transportation,Active Transportation
0,BRACKENFELL COMMON,"(-33.90799167799997,18.676640457000076),(-33.9...",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0
1,PAROW GOLF COURSE,"(-33.89798150799993,18.568331451000063),(-33.8...",2,2,1,1,1,1,1,1,1,1,1,5,1,2,3,2,0,0
2,BO-KAAP,"(-33.91609222899996,18.41288776300007),(-33.91...",5,5,1,1,1,1,1,1,1,1,1,1,1,3,1,1,1,1
3,KUILSRIVIER INDUSTRIA,"(-33.93807133399997,18.686884202000044),(-33.9...",2,1,1,1,1,1,1,1,1,1,1,5,1,3,1,1,0,0
4,KRAAIFONTEIN EAST,"(-33.848411306999935,18.73566532600006),(-33.8...",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0
5,KUILS RIVER SPORTS GROUNDS,"(-33.94360575999997,18.698297586000024),(-33.9...",1,1,1,1,1,1,1,1,1,1,1,5,1,3,1,1,0,0
6,BELGRAVIA -BELLVILLE,"(-33.90131823499996,18.64735268800007),(-33.90...",1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,0,0
7,MOUILLE POINT,"(-33.899010791999956,18.41249779900005),(-33.8...",5,1,1,1,1,2,1,1,1,1,1,1,1,4,1,2,1,0
8,TAMBOERSKLOOF,"(-33.92409573499998,18.405299497000044),(-33.9...",5,5,1,1,3,2,1,2,1,3,2,5,2,2,2,2,1,0
9,VREDELUST KUILS RIVER,"(-33.94741781199997,18.69624518300003),(-33.94...",1,1,1,1,1,1,1,1,1,1,1,5,1,1,1,1,0,0


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Suburb_name              101 non-null    object
 1   Coordinates              101 non-null    object
 2   Eating                   101 non-null    int64 
 3   Nightlife                101 non-null    int64 
 4   Culture                  101 non-null    int64 
 5   Community                101 non-null    int64 
 6   Retail Shopping Centres  101 non-null    int64 
 7   Clinic                   101 non-null    int64 
 8   Dentist                  101 non-null    int64 
 9   Doctors                  101 non-null    int64 
 10  Hospital                 101 non-null    int64 
 11  Pharmacy                 101 non-null    int64 
 12  Veterinary               101 non-null    int64 
 13  Leisure spots            101 non-null    int64 
 14  Sports Centres           101 non-null    i

In [3]:
# Mapping of user-selected categories to their respective amenities
category_mapping = {
    'Social & Cultural Activities': ['Eating', 'Nightlife', 'Culture'],
    'Health & Wellness': ['Clinic', 'Dentist', 'Doctors', 'Hospital', 'Pharmacy', 'Veterinary'],
    'Leisure & Recreation': ['Leisure spots', 'Sports Centres', 'Leisure Areas'],
    'Community & Services': ['Retail Shopping Centres', 'Community', 'Economic', 'Education']
}

In [4]:
# User input (example values)
user_input = {
    'Social & Cultural Activities': 4,
    'Health & Wellness': 2,
    'Leisure & Recreation': 3,
    'Community & Services': 1,
    'Public Transportation': False,
    'Active Transportation': False
}
        

In [5]:
# Create a new DataFrame to store the averaged scores
averaged_df = pd.DataFrame()
averaged_df['Suburb_name'] = df['Suburb_name']
averaged_df['Coordinates'] = df['Coordinates']

In [6]:
# Calculate the average scores for each category
for category, amenities in category_mapping.items():
    user_score = user_input.get(category, 1)  # Get the score or set to 1 if not present
    if user_score > 1:  # Only consider categories with a score greater than 1
        weights = np.array([user_score for amenity in amenities])
        averaged_df[category] = df[amenities].mean(axis=1).apply(lambda x: round(x))

In [7]:
# Include 'Public_Transportation' and 'Active_Transportation' only if user selected 1 for at least one of them
if user_input['Public Transportation']:
    averaged_df['Public Transportation'] = df['Public Transportation']
elif user_input['Active Transportation']:
    averaged_df['Active Transportation'] = df['Active Transportation']

In [8]:
# Order the categories by similarity to the user's importance level
ordered_categories = sorted(category_mapping.keys(), key=lambda category: abs(user_input.get(category, 0) - averaged_df[category].mean()), reverse=True)

# Select features based on ordered categories
selected_features = averaged_df[ordered_categories]

KeyError: 'Community & Services'

In [None]:
print("Averaged DataFrame columns:", averaged_df.columns)
print("Category mapping keys:", category_mapping.keys())

In [None]:
averaged_df.head()

In [None]:
# Fit Nearest Neighbors model
nn_model = NearestNeighbors(n_neighbors=6, algorithm='ball_tree')
nn_model.fit(selected_features)

In [None]:
# Find nearest neighbors
neighbor_distances, neighbor_indices = nn_model.kneighbors(selected_features.iloc[0].values.reshape(1, -1))

In [None]:
# Fit Nearest Neighbors model
nn_model = NearestNeighbors(n_neighbors=6, algorithm='ball_tree')
nn_model.fit(selected_features)

In [None]:
# Find the indices of the nearest neighbors
user_preferences = [user_input[category] for category in ordered_categories]
distances, indices = nn_model.kneighbors([user_preferences])

In [None]:
# Get the suburb names and coordinates of the nearest neighbors
nearest_suburbs = averaged_df.iloc[indices[0]][['Suburb_name', 'Coordinates']]

print("Top 5 best suburbs for the user:")
print(nearest_suburbs.head())

In [None]:
nearest_suburbs.columns

In [None]:
import folium


# Select the top 5 suburbs
top_suburbs = nearest_suburbs.head()

# Create a map centered at Cape Town's coordinates
map_center = [-33.9249, 18.4241]
m = folium.Map(location=map_center, zoom_start=11)

# Add markers for the top suburbs
for index, suburb in top_suburbs.iterrows():
    # Parse the coordinates string into a list of polygons
    coordinates_list = suburb['Coordinates'].split('),(')
    coordinates_list[0] = coordinates_list[0].strip('([')
    coordinates_list[-1] = coordinates_list[-1].strip('])')
    
    # Convert coordinates to a list of (lat, lon) pairs
    polygon_coords = []
    for coords in coordinates_list:
        lon, lat = coords.split(',')
        polygon_coords.append([float(lat), float(lon)])

    # Create a GeoJSON feature for the suburb's boundary
    suburb_boundary = {
        "type": "Feature",
        "geometry": {
            "type": "Polygon",
            "coordinates": [polygon_coords]
        },
        "properties": {
            "name": suburb['Suburb_name']
        }
    }
    
    # Add the GeoJSON feature as a GeoJSON layer to the map
    folium.GeoJson(suburb_boundary,
                   name=suburb['Suburb_name'],
                   style_function=lambda x: {
                       'fillColor': '#00FF01',
                       'color': 'black',
                       'weight': 2,
                       'fillOpacity': 0.2,
                   }).add_to(m)

    # Add a marker for the centroid of the suburb's boundary
    centroid_lat = sum(lat for lat, lon in polygon_coords) / len(polygon_coords)
    centroid_lon = sum(lon for lat, lon in polygon_coords) / len(polygon_coords)
    folium.Marker(location=[centroid_lat, centroid_lon], popup=suburb['Suburb_name']).add_to(m)

# Display the map
folium.LayerControl().add_to(m)  # Add layer control for turning on/off layers
m


In [None]:
import folium

# Select the top 5 suburbs
top_suburbs = nearest_suburbs.head()

# Create a map centered at Cape Town's coordinates
map_center = [-33.9249, 18.4241]
m = folium.Map(location=map_center, zoom_start=11)

# Add markers for the top suburbs
for index, suburb in top_suburbs.iterrows():
    # ... (rest of your code)

    # Add the GeoJSON feature as a GeoJSON layer to the map
    folium.GeoJson(suburb_boundary,
                   name=suburb['Suburb_name'],
                   style_function=lambda x: {
                       'fillColor': '#00FF01',
                       'color': 'black',
                       'weight': 2,
                       'fillOpacity': 0.2,
                   },
                   tooltip=suburb['Suburb_name']).add_to(m)  # <-- Add tooltip for name

    # Add a marker for the centroid of the suburb's boundary
    centroid_lat = sum(lat for lat, lon in polygon_coords) / len(polygon_coords)
    centroid_lon = sum(lon for lat, lon in polygon_coords) / len(polygon_coords)
    folium.Marker(location=[centroid_lat, centroid_lon], popup=suburb['Suburb_name']).add_to(m)

# Display the map
folium.LayerControl().add_to(m)  # Add layer control for turning on/off layers
m.save("map_with_names.html")  # Save the map to an HTML file
