In [12]:
import pandas as pd
import numpy as np

In [13]:
suburbs = pd.read_csv('Sydney-Suburbs-Reviews.csv')

In [14]:
suburbs.head()

Unnamed: 0,Name,Region,Population (rounded)*,Postcode,Ethnic Breakdown 2016,Median House Price (2020),Median House Price (2021),% Change,Median House Rent (per week),Median Apartment Price (2020),...,Affordability (Rental),Affordability (Buying),Nature,Noise,Things to See/Do,Family-Friendliness,Pet Friendliness,Safety,Overall Rating,Review Link
0,Hornsby,Upper North Shore,23000,2077,"Chinese 17.1%, English 16.8%, Australian 14.0%...","$1,150,000.00","$1,400,000.00",21.74%,$600.00,"$645,000.00",...,7.0,6.0,8.0,7.0,7.0,9.0,8.0,9.0,7.1,https://sydneysuburbreviews.com/hornsby
1,Oatley,Southern Suburbs,10500,2223,"English 23.0%, Australian 21.1%, Chinese 9.8%,...","$1,550,000.00","$1,800,000.00",16.13%,$670.00,"$780,000.00",...,6.0,4.0,8.0,9.0,5.0,9.0,8.0,9.0,7.0,https://sydneysuburbreviews.com/oatley-suburb-...
2,Dulwich Hill,Inner West,14500,2203,"English 19.4%, Australian 16.4%, Irish 9.5%, S...","$1,500,000.00","$1,900,000.00",26.67%,$725.00,"$780,000.00",...,6.0,4.0,8.0,7.0,6.0,8.0,9.0,8.0,6.9,https://sydneysuburbreviews.com/dulwich-hill
3,Jannali,Sutherland Shire,6250,2226,"English 28.2%, Australian 26.3%, Irish 9.8%, S...","$1,050,000.00","$1,300,000.00",23.81%,$610.00,"$620,000.00",...,7.0,7.0,7.0,8.0,2.0,8.0,8.0,9.0,6.8,https://sydneysuburbreviews.com/jannali
4,Waverton,Lower North Shore,3250,2060,"English 24.9%, Australian 15.5%, Irish 11.0%, ...","$2,650,000.00","$3,400,000.00",28.30%,"$1,000.00","$1,200,000.00",...,5.0,2.0,8.0,9.0,4.0,9.0,8.0,9.0,6.8,https://sydneysuburbreviews.com/waverton


In [15]:
# check missing values in ethnic breakdown
suburbs['Ethnic Breakdown 2016'].isnull().sum()

np.int64(0)

In [16]:
# check missing values in median house rent
suburbs['Median House Rent (per week)'].isnull().sum()

np.int64(19)

In [17]:
# drop rows where median house rent is null
suburbs.dropna(subset=['Median House Rent (per week)'], inplace=True)
suburbs.isnull().sum()

Name                                               0
Region                                             0
Population (rounded)*                              0
Postcode                                           0
Ethnic Breakdown 2016                              0
Median House Price (2020)                          0
Median House Price (2021)                         10
% Change                                           0
Median House Rent (per week)                       0
Median Apartment Price (2020)                    116
Median Apartment Rent (per week)                  66
Public Housing %                                   2
Avg. Years Held                                   40
Time to CBD (Public Transport) [Town Hall St]      4
Time to CBD (Driving) [Town Hall St]               2
Nearest Train Station                             69
Highlights/Attractions                           292
Ideal for                                        290
Traffic                                       

In [18]:
suburbs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 402 entries, 0 to 420
Data columns (total 30 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Name                                           402 non-null    object 
 1   Region                                         402 non-null    object 
 2   Population (rounded)*                          402 non-null    object 
 3   Postcode                                       402 non-null    int64  
 4   Ethnic Breakdown 2016                          402 non-null    object 
 5   Median House Price (2020)                      402 non-null    object 
 6   Median House Price (2021)                      392 non-null    object 
 7   % Change                                       402 non-null    object 
 8   Median House Rent (per week)                   402 non-null    object 
 9   Median Apartment Price (2020)                  286 non-null

In [22]:
# Sydney Suburb Recommender Systems (Simplified)
# Focus on 3 clean columns: Name, Ethnic Breakdown, Median House Rent
# Two recommender systems:
# 1. Ethnic Background + Budget-based Recommender
# 2. Similarity-based Recommender using semantic vectorization of ethnic breakdown

import pandas as pd
import numpy as np
import re
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# For semantic similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# For visualization
import matplotlib.pyplot as plt
import seaborn as sns

print("📊 Sydney Suburb Recommender Systems (Simplified)")
print("=" * 60)

# =============================================================================
# DATA LOADING AND PREPROCESSING
# =============================================================================

def load_and_preprocess_data():
    """Load and preprocess the suburb data - focusing on 3 key columns"""
    
    # Load the data
    df = pd.read_csv('Sydney-Suburbs-Reviews.csv')
    print(f"✅ Loaded {len(df)} suburbs")
    
    # Focus only on the 3 clean columns
    key_columns = ['Name', 'Ethnic Breakdown 2016', 'Median House Rent (per week)']
    df_clean = df[key_columns].copy()
    df_clean.dropna(subset=['Median House Rent (per week)'], inplace=True)
    
    # Clean suburb names
    df_clean['Name'] = df_clean['Name'].str.strip()
    
    # Clean and parse ethnic breakdown
    df_clean = df_clean.dropna(subset=['Ethnic Breakdown 2016'])
    df_clean['Ethnic_Dict'] = df_clean['Ethnic Breakdown 2016'].apply(parse_ethnic_breakdown)
    
    # Clean rent data - extract numeric values
    df_clean['Weekly_Rent'] = df_clean['Median House Rent (per week)'].str.replace('$', '').str.replace(',', '')
    df_clean['Weekly_Rent'] = pd.to_numeric(df_clean['Weekly_Rent'], errors='coerce')
    
    # Remove rows with missing rent data
    df_clean = df_clean.dropna(subset=['Weekly_Rent'])
    
    # Enhanced ethnic breakdown text for better semantic understanding
    df_clean['Enhanced_Ethnic_Text'] = df_clean['Ethnic Breakdown 2016'].apply(enhance_ethnic_text)
    
    print(f"✅ Cleaned data: {len(df_clean)} suburbs with complete information")
    print(f"📊 Rent range: ${df_clean['Weekly_Rent'].min():.0f} - ${df_clean['Weekly_Rent'].max():.0f} per week")
    
    return df_clean

def parse_ethnic_breakdown(ethnic_str):
    """Parse ethnic breakdown string into dictionary with percentages"""
    if pd.isna(ethnic_str):
        return {}
    
    ethnic_dict = {}
    # Use regex to find ethnicity and percentage pairs
    pattern = r'([A-Za-z\s]+?)\s+([\d.]+)%'
    matches = re.findall(pattern, ethnic_str)
    
    for ethnicity, percentage in matches:
        ethnicity = ethnicity.strip()
        ethnic_dict[ethnicity] = float(percentage)
    
    return ethnic_dict

def enhance_ethnic_text(ethnic_str):
    """
    Enhance ethnic breakdown text for better semantic similarity
    Emphasizes percentages by repeating ethnic terms based on their percentage
    """
    if pd.isna(ethnic_str):
        return ""
    
    enhanced_text = []
    ethnic_dict = parse_ethnic_breakdown(ethnic_str)
    
    for ethnicity, percentage in ethnic_dict.items():
        # Clean ethnicity name
        clean_ethnicity = ethnicity.lower().replace(' ', '_')
        
        # Repeat ethnicity name based on percentage (scaled)
        # Higher percentages get more repetitions for TF-IDF weighting
        repetitions = max(1, int(percentage / 2))  # Divide by 2 to avoid too many repetitions
        
        # Add percentage categories for better semantic understanding
        if percentage >= 20:
            enhanced_text.extend([f"{clean_ethnicity}_dominant"] * repetitions)
        elif percentage >= 10:
            enhanced_text.extend([f"{clean_ethnicity}_significant"] * repetitions)
        elif percentage >= 5:
            enhanced_text.extend([f"{clean_ethnicity}_moderate"] * repetitions)
        else:
            enhanced_text.extend([f"{clean_ethnicity}_minor"] * repetitions)
        
        # Also add the base ethnicity multiple times based on percentage
        enhanced_text.extend([clean_ethnicity] * repetitions)
    
    return ' '.join(enhanced_text)

def get_unique_ethnicities(df):
    """Extract all unique ethnicities from the dataset"""
    all_ethnicities = set()
    for ethnic_dict in df['Ethnic_Dict']:
        all_ethnicities.update(ethnic_dict.keys())
    return sorted(list(all_ethnicities))

# =============================================================================
# RECOMMENDER SYSTEM 1: ETHNIC BACKGROUND + BUDGET BASED
# =============================================================================

class EthnicBudgetRecommender:
    """Recommender based on ethnic background preference and budget"""
    
    def __init__(self, df):
        self.df = df.copy()
        self.ethnicities = get_unique_ethnicities(df)
        print(f"🌍 Found {len(self.ethnicities)} unique ethnicities")
        
    def recommend_suburbs(self, target_ethnicity, max_weekly_rent, top_n=10):
        """
        Recommend suburbs based on ethnicity preference and budget
        
        Parameters:
        - target_ethnicity: str, preferred ethnicity
        - max_weekly_rent: float, maximum weekly rent budget
        - top_n: int, number of recommendations
        
        Returns:
        - DataFrame with recommended suburbs
        """
        
        # Filter by budget
        budget_filtered = self.df[self.df['Weekly_Rent'] <= max_weekly_rent].copy()
        
        if len(budget_filtered) == 0:
            print(f"❌ No suburbs found within budget of ${max_weekly_rent}/week")
            return pd.DataFrame()
        
        # Calculate ethnicity percentage for target ethnicity
        budget_filtered['Ethnicity_Percentage'] = budget_filtered['Ethnic_Dict'].apply(
            lambda x: x.get(target_ethnicity, 0)
        )
        
        # Sort by ethnicity percentage (primary) and then by rent (secondary, lower is better)
        budget_filtered['Rent_Score'] = 1 / (budget_filtered['Weekly_Rent'] / budget_filtered['Weekly_Rent'].min())
        budget_filtered['Combined_Score'] = (
            budget_filtered['Ethnicity_Percentage'] * 0.8 +  # 80% weight on ethnicity match
            budget_filtered['Rent_Score'] * 0.2               # 20% weight on rent affordability
        )
        
        # Sort by combined score
        recommendations = budget_filtered.nlargest(top_n, 'Combined_Score')
        
        # Format output
        result_cols = ['Name', 'Weekly_Rent', 'Ethnicity_Percentage', 'Combined_Score', 'Ethnic Breakdown 2016']
        
        return recommendations[result_cols].round(2)
    
    def show_available_ethnicities(self):
        """Display all available ethnicities"""
        print("🌍 Available Ethnicities:")
        for i, ethnicity in enumerate(self.ethnicities, 1):
            print(f"{i:2d}. {ethnicity}")
    
    def get_ethnicity_stats(self, ethnicity):
        """Get statistics for a specific ethnicity across all suburbs"""
        percentages = []
        suburbs_with_ethnicity = []
        
        for _, row in self.df.iterrows():
            if ethnicity in row['Ethnic_Dict']:
                percentages.append(row['Ethnic_Dict'][ethnicity])
                suburbs_with_ethnicity.append(row['Name'])
        
        if percentages:
            print(f"\n📊 {ethnicity} Statistics:")
            print(f"• Found in {len(percentages)} suburbs")
            print(f"• Average percentage: {np.mean(percentages):.1f}%")
            print(f"• Highest percentage: {max(percentages):.1f}%")
            print(f"• Suburbs with highest {ethnicity} population:")
            
            # Show top 5 suburbs for this ethnicity
            suburb_percentages = list(zip(suburbs_with_ethnicity, percentages))
            suburb_percentages.sort(key=lambda x: x[1], reverse=True)
            for suburb, pct in suburb_percentages[:5]:
                print(f"  • {suburb}: {pct}%")

# =============================================================================
# RECOMMENDER SYSTEM 2: SIMILARITY-BASED RECOMMENDER
# =============================================================================

class SimilarityRecommender:
    """Similarity-based recommender using semantic vectorization of ethnic breakdown"""
    
    def __init__(self, df):
        self.df = df.copy()
        self.prepare_semantic_features()
        
    def prepare_semantic_features(self):
        """Prepare semantic features from ethnic breakdown text"""
        
        # Use TF-IDF on enhanced ethnic text
        self.tfidf_vectorizer = TfidfVectorizer(
            max_features=200,           # Limit features for efficiency
            ngram_range=(1, 2),         # Use both unigrams and bigrams
            lowercase=True,
            token_pattern=r'\b\w+\b'    # Simple word tokenization
        )
        
        # Fit TF-IDF on enhanced ethnic text
        ethnic_texts = self.df['Enhanced_Ethnic_Text'].fillna('')
        self.ethnic_tfidf_matrix = self.tfidf_vectorizer.fit_transform(ethnic_texts)
        
        # Normalize rent data for similarity calculation
        rent_scaler = MinMaxScaler()
        rent_normalized = rent_scaler.fit_transform(self.df[['Weekly_Rent']])
        
        # Store normalized rent for later use
        self.rent_normalized = rent_normalized.flatten()
        
        print("✅ Semantic features prepared using TF-IDF vectorization")
        print(f"📊 Ethnic features: {self.ethnic_tfidf_matrix.shape[1]} TF-IDF features")
    
    def find_similar_suburbs(self, input_suburbs, top_n=10, exclude_input=True, rent_weight=0.3):
        """
        Find suburbs similar to input suburbs based on ethnic breakdown
        
        Parameters:
        - input_suburbs: list of suburb names
        - top_n: number of recommendations
        - exclude_input: whether to exclude input suburbs from results
        - rent_weight: weight for rent similarity (0-1)
        
        Returns:
        - DataFrame with similar suburbs and similarity scores
        """
        
        # Find indices of input suburbs
        input_indices = []
        found_suburbs = []
        
        for suburb in input_suburbs:
            matches = self.df[self.df['Name'].str.lower() == suburb.lower()]
            if len(matches) > 0:
                idx = matches.index[0]
                input_indices.append(idx)
                found_suburbs.append(matches.iloc[0]['Name'])
            else:
                print(f"⚠️  Suburb '{suburb}' not found in dataset")
        
        if len(input_indices) == 0:
            print("❌ No valid input suburbs found")
            return pd.DataFrame()
        
        print(f"✅ Found suburbs: {', '.join(found_suburbs)}")
        
        # Calculate average ethnic TF-IDF features of input suburbs
        input_ethnic_features = self.ethnic_tfidf_matrix[input_indices]
        avg_ethnic_features = np.mean(input_ethnic_features.toarray(), axis=0)
        
        # Calculate average rent of input suburbs
        avg_rent = np.mean(self.rent_normalized[input_indices])
        
        # Calculate ethnic similarity with all suburbs
        ethnic_similarities = cosine_similarity([avg_ethnic_features], self.ethnic_tfidf_matrix)[0]
        
        # Calculate rent similarity (inverse of absolute difference)
        rent_differences = np.abs(self.rent_normalized - avg_rent)
        rent_similarities = 1 - rent_differences  # Convert to similarity (higher is better)
        
        # Combine ethnic and rent similarities
        combined_similarities = (
            ethnic_similarities * (1 - rent_weight) +
            rent_similarities * rent_weight
        )
        
        # Create results dataframe
        results = self.df.copy()
        results['Ethnic_Similarity'] = ethnic_similarities
        results['Rent_Similarity'] = rent_similarities
        results['Combined_Similarity'] = combined_similarities
        
        # Exclude input suburbs if requested
        if exclude_input:
            results = results.drop(input_indices)
        
        # Sort by combined similarity and get top N
        recommendations = results.nlargest(top_n, 'Combined_Similarity')
        
        # Format output
        result_cols = ['Name', 'Weekly_Rent', 'Ethnic_Similarity', 'Rent_Similarity', 
                      'Combined_Similarity', 'Ethnic Breakdown 2016']
        
        return recommendations[result_cols].round(3)
    
    def analyze_suburb_profile(self, suburb_name):
        """Analyze the ethnic profile of a suburb"""
        
        matches = self.df[self.df['Name'].str.lower() == suburb_name.lower()]
        if len(matches) == 0:
            print(f"❌ Suburb '{suburb_name}' not found")
            return
        
        suburb_data = matches.iloc[0]
        
        print(f"\n🏘️  Profile Analysis: {suburb_data['Name']}")
        print("=" * 50)
        print(f"Weekly Rent: ${suburb_data['Weekly_Rent']}")
        
        print(f"\n🌍 Ethnic Breakdown:")
        ethnic_items = list(suburb_data['Ethnic_Dict'].items())
        ethnic_items.sort(key=lambda x: x[1], reverse=True)  # Sort by percentage
        
        for ethnicity, percentage in ethnic_items:
            print(f"  • {ethnicity}: {percentage}%")
    
    def compare_suburbs(self, suburb1, suburb2):
        """Compare two suburbs ethnically"""
        
        # Find both suburbs
        matches1 = self.df[self.df['Name'].str.lower() == suburb1.lower()]
        matches2 = self.df[self.df['Name'].str.lower() == suburb2.lower()]
        
        if len(matches1) == 0:
            print(f"❌ Suburb '{suburb1}' not found")
            return
        if len(matches2) == 0:
            print(f"❌ Suburb '{suburb2}' not found")
            return
        
        idx1, idx2 = matches1.index[0], matches2.index[0]
        data1, data2 = matches1.iloc[0], matches2.iloc[0]
        
        # Calculate similarity
        similarity = cosine_similarity(
            self.ethnic_tfidf_matrix[idx1:idx1+1], 
            self.ethnic_tfidf_matrix[idx2:idx2+1]
        )[0][0]
        
        print(f"\n🔍 Comparison: {data1['Name']} vs {data2['Name']}")
        print("=" * 60)
        print(f"Ethnic Similarity Score: {similarity:.3f}")
        print(f"Rent Difference: ${abs(data1['Weekly_Rent'] - data2['Weekly_Rent'])}")
        
        print(f"\n{data1['Name']} - Rent: ${data1['Weekly_Rent']}")
        for eth, pct in sorted(data1['Ethnic_Dict'].items(), key=lambda x: x[1], reverse=True):
            print(f"  • {eth}: {pct}%")
        
        print(f"\n{data2['Name']} - Rent: ${data2['Weekly_Rent']}")
        for eth, pct in sorted(data2['Ethnic_Dict'].items(), key=lambda x: x[1], reverse=True):
            print(f"  • {eth}: {pct}%")

# =============================================================================
# MAIN EXECUTION AND EXAMPLES
# =============================================================================

def main():
    """Main execution function with examples"""
    
    # Load data
    print("📚 Loading and preprocessing data...")
    df = load_and_preprocess_data()
    
    # Initialize recommenders
    print("\n🚀 Initializing recommender systems...")
    ethnic_recommender = EthnicBudgetRecommender(df)
    similarity_recommender = SimilarityRecommender(df)
    
    print("\n" + "="*70)
    print("🎯 EXAMPLE 1: ETHNIC BACKGROUND + BUDGET RECOMMENDER")
    print("="*70)
    
    # Example 1: Find suburbs for Chinese background with $800/week budget
    print("\n🔍 Finding suburbs for Chinese background with $800/week budget:")
    chinese_recs = ethnic_recommender.recommend_suburbs('Chinese', 800, top_n=5)
    if len(chinese_recs) > 0:
        print(chinese_recs[['Name', 'Weekly_Rent', 'Ethnicity_Percentage', 'Combined_Score']])
    
    # Example 2: Find suburbs for Indian background with $600/week budget
    print("\n🔍 Finding suburbs for Indian background with $600/week budget:")
    indian_recs = ethnic_recommender.recommend_suburbs('Indian', 600, top_n=5)
    if len(indian_recs) > 0:
        print(indian_recs[['Name', 'Weekly_Rent', 'Ethnicity_Percentage', 'Combined_Score']])
    
    print("\n" + "="*70)
    print("🎯 EXAMPLE 2: SIMILARITY-BASED RECOMMENDER")
    print("="*70)
    
    # Example 3: Find suburbs similar to Hornsby and Oatley
    print("\n🔍 Finding suburbs similar to Hornsby and Oatley:")
    similar_recs = similarity_recommender.find_similar_suburbs(['Hornsby', 'Oatley'], top_n=5)
    if len(similar_recs) > 0:
        print(similar_recs[['Name', 'Weekly_Rent', 'Ethnic_Similarity', 'Combined_Similarity']])
    
    # Example 4: Analyze suburb profile
    print("\n🔍 Analyzing Hornsby suburb profile:")
    similarity_recommender.analyze_suburb_profile('Hornsby')
    
    # Example 5: Compare two suburbs
    print("\n🔍 Comparing Hornsby vs Oatley:")
    similarity_recommender.compare_suburbs('Hornsby', 'Oatley')
    
    print("\n" + "="*70)
    print("📋 AVAILABLE ETHNICITIES")
    print("="*70)
    ethnic_recommender.show_available_ethnicities()
    
    return ethnic_recommender, similarity_recommender, df

# =============================================================================
# INTERACTIVE FUNCTIONS
# =============================================================================

def interactive_ethnic_search(ethnic_recommender):
    """Interactive function for ethnic background search"""
    print("\n🔍 Interactive Ethnic Background Search")
    print("-" * 40)
    
    ethnicity = input("Enter preferred ethnicity: ").strip()
    budget = float(input("Enter maximum weekly rent budget: $"))
    n_results = int(input("Number of recommendations (default 10): ") or 10)
    
    results = ethnic_recommender.recommend_suburbs(ethnicity, budget, n_results)
    
    if len(results) > 0:
        print(f"\n✅ Top {len(results)} recommendations:")
        for i, (_, row) in enumerate(results.iterrows(), 1):
            print(f"\n{i}. {row['Name']}")
            print(f"   • Weekly Rent: ${row['Weekly_Rent']}")
            print(f"   • {ethnicity} Population: {row['Ethnicity_Percentage']}%")
            print(f"   • Score: {row['Combined_Score']:.2f}")
    else:
        print("❌ No recommendations found with your criteria")

def interactive_similarity_search(similarity_recommender):
    """Interactive function for similarity search"""
    print("\n🔍 Interactive Similarity Search")
    print("-" * 40)
    
    suburbs_input = input("Enter 2-5 suburb names (comma-separated): ").strip()
    input_suburbs = [s.strip() for s in suburbs_input.split(',')]
    n_results = int(input("Number of recommendations (default 10): ") or 10)
    rent_weight = float(input("Rent similarity weight 0-1 (default 0.3): ") or 0.3)
    
    results = similarity_recommender.find_similar_suburbs(input_suburbs, n_results, rent_weight=rent_weight)
    
    if len(results) > 0:
        print(f"\n✅ Top {len(results)} similar suburbs:")
        for i, (_, row) in enumerate(results.iterrows(), 1):
            print(f"\n{i}. {row['Name']}")
            print(f"   • Weekly Rent: ${row['Weekly_Rent']}")
            print(f"   • Ethnic Similarity: {row['Ethnic_Similarity']:.3f}")
            print(f"   • Combined Similarity: {row['Combined_Similarity']:.3f}")
    else:
        print("❌ No recommendations found")

# Run the main function
if __name__ == "__main__":
    ethnic_recommender, similarity_recommender, df = main()
    
    print("\n" + "="*70)
    print("🎮 INTERACTIVE MODE")
    print("="*70)
    print("You can now use the following functions:")
    print("• interactive_ethnic_search(ethnic_recommender)")
    print("• interactive_similarity_search(similarity_recommender)")
    print("• similarity_recommender.analyze_suburb_profile('suburb_name')")
    print("• similarity_recommender.compare_suburbs('suburb1', 'suburb2')")
    print("• ethnic_recommender.get_ethnicity_stats('ethnicity_name')")

📊 Sydney Suburb Recommender Systems (Simplified)
📚 Loading and preprocessing data...
✅ Loaded 421 suburbs
✅ Cleaned data: 402 suburbs with complete information
📊 Rent range: $310 - $2250 per week

🚀 Initializing recommender systems...
🌍 Found 30 unique ethnicities
✅ Semantic features prepared using TF-IDF vectorization
📊 Ethnic features: 200 TF-IDF features

🎯 EXAMPLE 1: ETHNIC BACKGROUND + BUDGET RECOMMENDER

🔍 Finding suburbs for Chinese background with $800/week budget:
           Name  Weekly_Rent  Ethnicity_Percentage  Combined_Score
66   Hurstville        590.0                  49.4           39.63
105     Burwood        700.0                  45.1           36.17
93       Rhodes        645.0                  44.5           35.70
39     Eastwood        690.0                  38.4           30.81
114      Ultimo        750.0                  36.6           29.36

🔍 Finding suburbs for Indian background with $600/week budget:
               Name  Weekly_Rent  Ethnicity_Percentage  

In [33]:
interactive_ethnic_search(ethnic_recommender)



🔍 Interactive Ethnic Background Search
----------------------------------------

✅ Top 5 recommendations:

1. Willmot
   • Weekly Rent: $310.0
   • Nepali Population: 0%
   • Score: 0.20

2. Lethbridge Park
   • Weekly Rent: $325.0
   • Nepali Population: 0%
   • Score: 0.19

3. Bidwill
   • Weekly Rent: $330.0
   • Nepali Population: 0%
   • Score: 0.19

4. Tregear
   • Weekly Rent: $330.0
   • Nepali Population: 0%
   • Score: 0.19

5. Blackett
   • Weekly Rent: $330.0
   • Nepali Population: 0%
   • Score: 0.19
