In [3]:
import sys
import os
import time
import requests
import re
import numpy as np
import pandas as pd
import glob
from bs4 import BeautifulSoup

# --- VISUALIZATION ---
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns  

# --- MAPPING ---
import folium
from folium.plugins import HeatMap

# --- SELENIUM ---
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

# --- GUI (TKINTER) ---
from tkinter import *
from tkinter import ttk, messagebox  # <--- TH√äM messagebox V√ÄO ƒê√ÇY
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import webbrowser

# --- CONFIG ---
HEADERS = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                         "AppleWebKit/537.36 (KHTML, like Gecko) "
                         "Chrome/128.0.0.0 Safari/537.36"}

In [4]:
# Cell n√†y l·∫•y danh s√°ch c√°c th√†nh ph·ªë
OUTPUT_FILE = os.path.join('', 'cities_mapping.csv')
def get_cities_list():
    response = requests.get('https://insideairbnb.com/explore/', headers=HEADERS, timeout=10)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    data_list = []
    continents = soup.find_all('div', class_='continentContainer')
    

    for continent in continents:
        current_country = "Unknown"
        
        # Duy·ªát qua t·∫•t c·∫£ c√°c th·∫ª <p> b√™n trong (v√¨ Country v√† City ƒë·ªÅu l√† th·∫ª p n·∫±m ngang h√†ng nhau)
        for p in continent.find_all('p'):
            
            # TR∆Ø·ªúNG H·ª¢P 1: L√† t√™n Qu·ªëc gia (class="countryLabel")
            if 'countryLabel' in p.get('class', []):
                current_country = p.get_text(strip=True)
                
            # TR∆Ø·ªúNG H·ª¢P 2: L√† t√™n Th√†nh ph·ªë (class="cityLabel")
            elif 'cityLabel' in p.get('class', []):
                link_tag = p.find('a')
                if link_tag:
                    # L·∫•y t√™n th√†nh ph·ªë (VD: Montreal)
                    city_name = link_tag.get_text(strip=True)
                    
                    # L·∫•y slug t·ª´ href (VD: /montreal/ -> montreal)
                    slug = link_tag['href'].strip('/')
                    
                    # L·∫•y v√πng mi·ªÅn (VD: Quebec) - Ph·∫ßn ch·ªØ n·∫±m ngo√†i th·∫ª <a>
                    # p.get_text() s·∫Ω l·∫•y c·∫£ "Montreal Quebec", ta xo√° ph·∫ßn "Montreal" ƒëi
                    full_text = p.get_text(" ", strip=True) 
                    region = full_text.replace(city_name, "").strip()
                    
                    # T·∫°o t√™n hi·ªÉn th·ªã ƒë·∫πp cho Search Bar
                    # V√≠ d·ª•: "Montreal - Quebec (Canada)"
                    if region:
                        display_name = f"{city_name}, {region} ({current_country})"
                    else:
                        display_name = f"{city_name} ({current_country})"
                    
                    # Th√™m v√†o danh s√°ch
                    data_list.append({
                        'display_name': display_name, # D√πng ƒë·ªÉ hi·ªán tr√™n Search bar
                        'slug': slug,                 # D√πng ƒë·ªÉ gh√©p URL c√†o d·ªØ li·ªáu
                        'city': city_name,
                        'region': region,
                        'country': current_country
                    })

    # 3. L∆∞u ra CSV
    if data_list:
        df = pd.DataFrame(data_list)
        df.to_csv(OUTPUT_FILE, index=False, encoding='utf-8')
        
        
        # Hi·ªÉn th·ªã 5 d√≤ng ƒë·∫ßu xem th·ª≠
        display(df.head())
    else:
        print("Kh√¥ng t√¨m th·∫•y th√†nh ph·ªë n√†o. H√£y ki·ªÉm tra l·∫°i c·∫•u tr√∫c web.")


In [5]:
#Cell n√†y t·∫£i d·ªØ li·ªáu th√†nh ph·ªë v√†o raw
OUTPUT_ROOT = "../../raw"
TARGET_FILES_ENDINGS = ["listings.csv.gz", "calendar.csv.gz", "reviews.csv.gz", "neighbourhoods.csv"]

def download_file(url, folder_path):
    filename = url.split("/")[-1]
    save_path = os.path.join(folder_path, filename)
    
    try:
        response = requests.get(url, headers=HEADERS)  
        response.raise_for_status()
    
        with open(save_path, 'wb') as f:
            f.write(response.content) 
    except Exception as e:
        print(f" Error: {e}")

def scrape_robust(city_name):
    
    options = Options() # Kh√¥ng ƒë·ªÉ ƒë∆∞·ª£c headless cho chuy√™n nghi·ªáp
    driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=options)
    try:
        url = "http://insideairbnb.com/get-the-data/"
        driver.get(url)
        #Chu·∫©n h√≥a t√™n th√†nh ph·ªë
        city_xpath = f"//h3[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{city_name.lower()}')]"
        
        try:
            # 1. T√¨m title Th√†nh Ph·ªë
            city_header = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.XPATH, city_xpath))
            )
            
            # L∆∞·ªõt ƒë·∫øn th√†nh ph·ªë c·∫ßn thi·∫øt
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", city_header)
            time.sleep(1)

            # 2. T√¨m n√∫t Show n·∫±m ngay sau ti√™u ƒë·ªÅ ƒë√≥
            # XPath: T·ª´ th·∫ª H3 city, t√¨m th·∫ª 'a' k·∫ø ti·∫øp c√≥ ch·ª©a ch·ªØ 'show'
            show_btn_xpath = f"{city_xpath}/following::a[contains(text(), 'show')][1]"
            
            show_button = driver.find_element(By.XPATH, show_btn_xpath)

            driver.execute_script("arguments[0].click();", show_button)
            time.sleep(5)
            
        except Exception as e:
            print(f"L·ªói kh√¥ng li√™n quan ƒë·∫øn x·ª≠ l√Ω n√∫t show: {e}")

        # 3. T·∫£i d·ªØ li·ªáu
        soup = BeautifulSoup(driver.page_source, 'html.parser')


        all_h3 = soup.find_all('h3')
        target_h3 = None
        for h3 in all_h3:
            if city_name.lower() in " ".join(h3.get_text().split()).lower():
                target_h3 = h3
                break
        
        if not target_h3:
            print("V·∫´n kh√¥ng th·∫•y H3 trong HTML sau khi t·∫£i.")
            return

        current_date_str = None
        processed_count = 0
        found_snapshots = set()
        
        for element in target_h3.next_elements:
            # G·∫∑p th√†nh ph·ªë kh√°c th√¨ d·ª´ng
            if element.name == 'h3': break
            # Ng√†y th√°ng c·ªßa snapshot ƒëang x·ª≠ l√Ω
            if element.name == 'h4':
                raw_date = element.get_text(strip=True)
                match = re.search(r"(\d{1,2}\s+[A-Za-z]+\,?\s+\d{4})", raw_date)
                if match: current_date_str = match.group(1)
            
            if element.name == 'table':
                if not current_date_str: continue

                
                folder_path = os.path.join(OUTPUT_ROOT, city_name.lower(), current_date_str)
                
                links = element.find_all('a', href=True)
                folder_created = False
                # T·∫£i
                for link in links:
                    f_url = link['href']
                    if os.path.basename(f_url) in TARGET_FILES_ENDINGS:
                        if not folder_created:
                            if current_date_str not in found_snapshots:
                                print(f"  Snapshot: {current_date_str}")
                                found_snapshots.add(current_date_str)
                            if not os.path.exists(folder_path): os.makedirs(folder_path)
                            folder_created = True
                        
                        download_file(f_url, folder_path)
                        processed_count += 1


    except Exception as e:
        print(f"L·ªói t·ªïng: {e}")
    finally:
        driver.quit()
        pass
def crawl_longtitude_and_latitude(city_name):

    url = "https://nominatim.openstreetmap.org/search"

    
    params = {
        "q": city_name,
        "format": "json",
        "limit": 1
    }

    response = requests.get(url, params=params, headers=HEADERS)
    response.raise_for_status()
    
    data = response.json()
    
    if not data:
        raise ValueError(f"Kh√¥ng t√¨m th·∫•y t·ªça ƒë·ªô cho th√†nh ph·ªë: {city_name}")
    bbox = data[0].get("boundingbox")
    
    if not bbox:
         raise ValueError("Kh√¥ng t√¨m th·∫•y boundingbox trong d·ªØ li·ªáu tr·∫£ v·ªÅ")

    return {
        "min_latitude":  round(float(bbox[0]), 2),
        "max_latitude":  round(float(bbox[1]), 2),
        "min_longitude":  round(float(bbox[2]), 2),
        "max_longitude":  round(float(bbox[3]), 2)
    }


In [6]:
def clean_price(price_value):
    """H√†m l√†m s·∫°ch gi√° ti·ªÅn (Gi·ªØ nguy√™n logic c≈©)"""
    if pd.isna(price_value): return np.nan
    if isinstance(price_value, (int, float)): return float(price_value)
    s = str(price_value).strip()
    match = re.search(r"[-+]?[0-9,.]+", s.replace('$', ''))
    if not match: return np.nan
    num = match.group(0).replace(',', '') 
    try: return float(num)
    except: return np.nan


def process_city_data(city_name):
    """
    H√†m l·ªçc v√† x·ª≠ l√Ω d·ªØ li·ªáu cho m·ªôt th√†nh ph·ªë c·ª• th·ªÉ.
    
    Args:
        city_name (str): T√™n folder th√†nh ph·ªë (vd: 'brussels', 'berlin')
        lat_min, lat_max, lon_min, lon_max (float): Gi·ªõi h·∫°n to·∫° ƒë·ªô (Bounding Box)
        base_dir (str): ƒê∆∞·ªùng d·∫´n g·ªëc d·ª± √°n (m·∫∑c ƒë·ªãnh l√† '..')
    """
    
    # C·∫•u h√¨nh ƒë∆∞·ªùng d·∫´n ƒë·ªông theo city_name
    RAW_DIR = os.path.join("../..", 'raw', city_name)
    PROCESSED_DIR = os.path.join("../..", 'processed', city_name)
    REPORTS_DIR = os.path.join("../..", 'reports')
    coors = crawl_longtitude_and_latitude(city_name)
    os.makedirs(PROCESSED_DIR, exist_ok=True)
    os.makedirs(REPORTS_DIR, exist_ok=True)


    snapshot_folders = [f for f in glob.glob(os.path.join(RAW_DIR, '*')) if os.path.isdir(f)]
    qa_summary_list = []

    print(f"--- B·∫ÆT ƒê·∫¶U X·ª¨ L√ù {len(snapshot_folders)} SNAPSHOTS CHO: {city_name.upper()} ---")

    for folder_path in snapshot_folders:
        snapshot_name = os.path.basename(folder_path)
        print(f"\n>> ƒêang x·ª≠ l√Ω: {snapshot_name}")

        try:

            listings_df = pd.read_csv(os.path.join(folder_path, 'listings.csv.gz'), low_memory=True)
            calendar_df = pd.read_csv(os.path.join(folder_path, 'calendar.csv.gz'), low_memory=True)
            reviews_df = pd.read_csv(os.path.join(folder_path, 'reviews.csv.gz'), low_memory=False)
            neigh_df = pd.read_csv(os.path.join(folder_path, 'neighbourhoods.csv'))
        except FileNotFoundError:
            print(f"   [!] Thi·∫øu file trong folder {snapshot_name}, b·ªè qua.")
            continue

        # ---------------------------------------------------------
        # 1. X·ª¨ L√ù LISTINGS (C∆° b·∫£n & To·∫° ƒë·ªô)
        # ---------------------------------------------------------
        listings_df['price_numeric'] = listings_df['price'].apply(clean_price)
        
        # [QA1] Flag Price <= 0
        listings_df['qa_flag_price_zero'] = listings_df['price_numeric'].fillna(0) <= 0
        qa_summary_list.append({
            'snapshot_date': snapshot_name, 'rule_id': 'QA001_price_zero',
            'records_affected': int(listings_df['qa_flag_price_zero'].sum()),
            'handling_decision': 'G·∫Øn c·ªù'
        })

        # Datetime & Coords
        listings_df['host_since'] = pd.to_datetime(listings_df['host_since'], errors='coerce')
        listings_df['latitude'] = pd.to_numeric(listings_df['latitude'], errors='coerce')
        listings_df['longitude'] = pd.to_numeric(listings_df['longitude'], errors='coerce')

        # [QA2] Check Coordinates Out of Bounds (D√πng tham s·ªë ƒë·∫ßu v√†o)
        listings_df['qa_flag_out_of_city'] = (
            (listings_df['latitude'] < coors["min_latitude"]) | (listings_df['latitude'] > coors['max_latitude']) |
            (listings_df['longitude'] < coors['min_longitude']) | (listings_df['longitude'] > coors['max_longitude'])
        )
        qa_summary_list.append({
            'snapshot_date': snapshot_name, 'rule_id': 'QA002_coords_out_of_bounds',
            'records_affected': int(listings_df['qa_flag_out_of_city'].sum()),
            'handling_decision': 'G·∫Øn c·ªù'
        })

        # [QA3] Duplicate IDs
        dups = listings_df.duplicated(subset=['id']).sum()
        if dups > 0:
            listings_df = listings_df.drop_duplicates(subset=['id'], keep='first')
        qa_summary_list.append({
            'snapshot_date': snapshot_name, 'rule_id': 'QA003_duplicate_ids',
            'records_affected': int(dups),
            'handling_decision': 'Xo√° d√≤ng tr√πng'
        })

        # ---------------------------------------------------------
        # 2. X·ª¨ L√ù NEIGHBOURHOOD
        # ---------------------------------------------------------
        if 'neighbourhood_group' in neigh_df.columns: 
            neigh_df.drop(columns=['neighbourhood_group'], inplace=True)
        
        valid_neighbourhoods = set(neigh_df['neighbourhood'])
        
        # [QA4] Check valid neighbourhood
        col_neigh = 'neighbourhood_cleansed' if 'neighbourhood_cleansed' in listings_df.columns else 'neighbourhood'
        listings_df['qa_flag_invalid_neigh'] = ~listings_df[col_neigh].isin(valid_neighbourhoods)
        
        qa_summary_list.append({
            'snapshot_date': snapshot_name, 'rule_id': 'QA004_invalid_neighbourhood',
            'records_affected': int(listings_df['qa_flag_invalid_neigh'].sum()),
            'handling_decision': 'G·∫Øn c·ªù'
        })

        # ---------------------------------------------------------
        # 3. X·ª¨ L√ù REVIEWS
        # ---------------------------------------------------------
        reviews_df['date'] = pd.to_datetime(reviews_df['date'], errors='coerce')
        
        initial_reviews = len(reviews_df)
        reviews_df = reviews_df.dropna(subset=['comments'])
        

        valid_ids = set(listings_df['id'])
        reviews_df = reviews_df[reviews_df['listing_id'].isin(valid_ids)]
        
        removed_reviews = initial_reviews - len(reviews_df)
        qa_summary_list.append({
            'snapshot_date': snapshot_name, 'rule_id': 'QA005_orphaned_or_empty_reviews',
            'records_affected': int(removed_reviews),
            'handling_decision': 'Xo√° b·ªè'
        })

        # ---------------------------------------------------------
        # 4. X·ª¨ L√ù CALENDAR
        # ---------------------------------------------------------
        calendar_df['date'] = pd.to_datetime(calendar_df['date'], errors='coerce')
        calendar_df['price_numeric'] = calendar_df['price'].apply(clean_price)
        if 'adjusted_price' in calendar_df.columns: calendar_df.drop(columns=['adjusted_price'], inplace=True)
        
        calendar_df = calendar_df[calendar_df['listing_id'].isin(valid_ids)]

        # ---------------------------------------------------------
        # 5. L∆ØU FILE
        # ---------------------------------------------------------
        out_dir = os.path.join(PROCESSED_DIR, snapshot_name)
        os.makedirs(out_dir, exist_ok=True)
        
        listings_df.to_csv(os.path.join(out_dir, 'listings_processed.csv'), index=False)
        calendar_df.to_csv(os.path.join(out_dir, 'calendar_processed.csv'), index=False)
        reviews_df.to_csv(os.path.join(out_dir, 'reviews_processed.csv'), index=False)
        neigh_df.to_csv(os.path.join(out_dir, 'neighbourhoods_processed.csv'), index=False)
        
        print(f"   -> ƒê√£ l∆∞u xong: {out_dir}")

    # L∆∞u b√°o c√°o QA t·ªïng h·ª£p cho th√†nh ph·ªë ƒë√≥
    report_file = os.path.join(REPORTS_DIR, f'qa_summary_{city_name}.csv')
    pd.DataFrame(qa_summary_list).to_csv(report_file, index=False)
    print(f"\n--- HO√ÄN T·∫§T X·ª¨ L√ù {city_name.upper()}. Report saved to {report_file} ---")


In [7]:
def get_latest_processed_data(city_name):
    # L∆∞u √Ω: C·∫ßn ƒë·∫£m b·∫£o bi·∫øn PROCESSED_DIR tr·ªè ƒë√∫ng (d√πng ƒë∆∞·ªùng d·∫´n t∆∞∆°ng ƒë·ªëi ../..)
    processed_root = os.path.join("../..", 'processed', city_name)
    
    if not os.path.exists(processed_root):
        print(f"Kh√¥ng t√¨m th·∫•y d·ªØ li·ªáu cho {city_name} t·∫°i {processed_root}")
        return None
    
    subfolders = [f.path for f in os.scandir(processed_root) if f.is_dir()]
    if not subfolders: 
        print(f" Folder th√†nh ph·ªë r·ªóng: {city_name}")
        return None
    
    latest_folder = max(subfolders, key=os.path.getctime)
    file_path = os.path.join(latest_folder, 'listings_processed.csv')
    
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    return None

def visualize_map(city_name):
    print(f"üó∫Ô∏è ƒêANG T·∫†O B·∫¢N ƒê·ªí KH√îNG GIAN CHO: {city_name.upper()}")

    df = get_latest_processed_data(city_name)
    if df is None:
        print(" Kh√¥ng c√≥ d·ªØ li·ªáu.")
        return

    # --- L√ÄM S·∫†CH ---
    # ƒê·∫£m b·∫£o c·ªôt gi√° tr·ªã s·ªë t·ªìn t·∫°i
    if 'price_numeric' not in df.columns:
        print(" L·ªói: Kh√¥ng t√¨m th·∫•y c·ªôt 'price_numeric'. H√£y ki·ªÉm tra l·∫°i b∆∞·ªõc x·ª≠ l√Ω d·ªØ li·ªáu.")
        return

    df = df.dropna(subset=["latitude", "longitude", "price_numeric"])
    q_high = df["price_numeric"].quantile(0.99) # L·ªçc b·ªè top 1% gi√° ·∫£o
    df_clean = df[df["price_numeric"] < q_high].copy()

    # --- T√ÇM B·∫¢N ƒê·ªí ---
    center_lat = df_clean["latitude"].mean()
    center_lon = df_clean["longitude"].mean()

    # --- T·∫†O B·∫¢N ƒê·ªí ---
    m = folium.Map(
        location=[center_lat, center_lon],
        zoom_start=12,
        tiles="CartoDB positron"  
    )

    # ======================================================
    # 1Ô∏è‚É£ HEATMAP ‚Äì M·∫¨T ƒê·ªò LISTING (ƒÇN ƒêI·ªÇM PH√ÇN T√çCH KH√îNG GIAN)
    # ======================================================
    # Chuy·ªÉn d·ªØ li·ªáu v·ªÅ d·∫°ng list c√°c list to·∫° ƒë·ªô [Lat, Lon]
    heat_data = df_clean[["latitude", "longitude"]].values.tolist()
    
    HeatMap(
        data=heat_data,
        radius=10,
        blur=15,
        min_opacity=0.4,
        gradient={0.4: 'blue', 0.65: 'lime', 1: 'red'} # M√†u nhi·ªát: Xanh -> ƒê·ªè
    ).add_to(m)

    # ======================================================
    # 2Ô∏è‚É£ CIRCLE MARKER ‚Äì GI√Å (AIRBNB STYLE)
    # ======================================================
    # Ch·ªâ l·∫•y m·∫´u 1000 ƒëi·ªÉm ƒë·ªÉ tr√¨nh duy·ªát kh√¥ng b·ªã lag
    df_sample = df_clean.sample(min(1000, len(df_clean)), random_state=42)

    for _, row in df_sample.iterrows():
        price = row["price_numeric"]

        # Logic m√†u s·∫Øc
        if price < 80:
            color = "green" # Xanh l√° (R·∫ª)
        elif price < 150:
            color = "yellow" # Cam (Trung b√¨nh)
        else:
            color = "red" # ƒê·ªè (ƒê·∫Øt)

        folium.CircleMarker(
            location=[row["latitude"], row["longitude"]],
            radius=5,
            fill=True,
            fill_opacity=0.7,
            fill_color=color,
            color="white", # Vi·ªÅn tr·∫Øng cho n·ªïi
            weight=1,
            tooltip=f"<b>{row.get('name', 'Ph√≤ng')}</b><br>Gi√°: ${int(price)}"
        ).add_to(m)

    # --- L∆ØU FILE HTML ---
    output_file = f"spatial_analysis_{city_name}.html"
    m.save(output_file)

    print(f" ƒê√£ t·∫°o b·∫£n ƒë·ªì th√†nh c√¥ng: {output_file}")
    
    # Hi·ªÉn th·ªã b·∫£n ƒë·ªì ngay trong Notebook
    return m



In [8]:
def plot_room_type(city_name):
    # 1. Load d·ªØ li·ªáu (nh∆∞ c≈©)
    df = get_latest_processed_data(city_name)
    if df is None: return

    # 2. T√≠nh to√°n s·ªë li·ªáu

    room_counts = df['room_type'].value_counts()
    total = len(df)
    
    # Th·ª© t·ª± chu·∫©n

    categories = room_counts.index.tolist() 
    values = room_counts.values.tolist()
    percentages = [v/total * 100 for v in values]

    # 3. C·∫•u h√¨nh m√†u s·∫Øc 
    colors = []
    for cat in categories:
        if 'Entire home' in cat: colors.append('red') 
        elif 'Private room' in cat: colors.append('green') 
        else: colors.append('#767676')
    fig, ax = plt.subplots(figsize=(11, 7))

    fig.patch.set_facecolor('white') 
    ax.set_facecolor('white')

    # V·∫Ω thanh ngang
    y_pos = np.arange(len(categories))
    bars = ax.barh(y_pos, values, color=colors, height=0.7)
    
    # ƒê·∫£o ng∆∞·ª£c tr·ª•c Y
    ax.invert_yaxis() 
    


    ax.set_yticks(y_pos)
    ax.set_yticklabels(categories, fontsize=11, color='black', fontweight='bold')
    ax.tick_params(axis='x', colors='gray')
    ax.tick_params(axis='y', length=0)
    max_val = max(values)
    
    for i, (val, pct) in enumerate(zip(values, percentages)):
        if i == 0:
            ax.text(max_val *0.97, i - 0.9, "PH·ªî BI·∫æN NH·∫§T", 
                    fontsize=10, fontweight='bold', color='black', ha='left')
            ax.text(max_val * 0.87, i - 0.65, f"{pct:.1f}%", 
                    fontsize=24, fontweight='bold', color='black', ha='left')
            ax.text(max_val * 1.04, i - 0.7, f"({categories[0]})", 
                    fontsize=12, fontweight='bold', color='black', ha='left')

        
        label_text = f"{val:,} ({pct:.1f}%)"

        
        ax.text(max_val * 1.05, i , label_text, 
                fontsize=12, fontweight='bold', color=colors[i], ha='left', va='center')
        
        ax.text(max_val * 1.05, i  +0.2, categories[i].lower(), 
                fontsize=10, color="#060505", ha='left')


    plt.title(f"Room Type - {city_name.upper()}", 
              fontsize=16, fontweight='bold', color='gray', loc='center', pad=20)
    
    plt.xlabel('Listings', fontsize=12)
    
    plt.tight_layout()
    plt.close(fig)
    return fig


In [9]:
def plot_activity(city_name):
    # Load d·ªØ li·ªáu
    df = get_latest_processed_data(city_name)
    if df is None: return
    

    # Gi·∫£ ƒë·ªãnh: Ch·ªâ 30.5% kh√°ch vi·∫øt review theo https://insideairbnb.com/data-assumptions/ v·ªõi c·∫£ 30.5% cho d·ªØ li·ªáu kh√° gi·ªëng c√°i bi·ªÉu ƒë·ªì tr√™n explore the data
    REVIEW_RATE = 0.305
    # X·ª≠ l√Ω d·ªØ li·ªáu
    df['number_of_reviews_ltm'] = df['number_of_reviews_ltm'].fillna(0)
    
    # (Inside Airbnb gi·∫£ ƒë·ªãnh booking trung b√¨nh t·ªëi thi·ªÉu l√† 3 ƒë√™m)
    adj_min_nights = df['minimum_nights'].apply(lambda x: max(min(x, 30), 3))
    
    # C√îNG TH·ª®C: (Review 12 th√°ng / 0.5) * Min_Nights
    df['est_nights_booked'] = (df['number_of_reviews_ltm']/ REVIEW_RATE) * adj_min_nights
    # CAPPING: Gi·ªõi h·∫°n t·ªëi ƒëa 255 ƒë√™m/nƒÉm (chi·∫øm kho·∫£ng 70% nƒÉm)
    df['est_nights_booked'] = df['est_nights_booked'].clip(upper=255)
    # T√≠nh thu nh·∫≠p ∆∞·ªõc t√≠nh
    df_price_filtered = df[df['price_numeric'] <= 500]  
    df_price_filtered['est_income'] = (df_price_filtered['est_nights_booked']* df_price_filtered['price_numeric'])


    # CHIA NH√ìM
    # C√°c m·ªëc b·∫Øt tr∆∞·ªõc explore the data : 0, 1-30... ƒë·∫øn 241-255+
    bins = [-1, 0, 30, 60, 90, 120, 150, 180, 210, 240, 999]
    labels = ['0', '1-30', '31-60', '61-90', '91-120', '121-150', '151-180', '181-210', '211-240', '241-255+']
    
    df['occupancy_group'] = pd.cut(df['est_nights_booked'], bins=bins, labels=labels)
    group_counts = df['occupancy_group'].value_counts().reindex(labels) 
    avg_nights = df['est_nights_booked'].mean()
    avg_price = df_price_filtered['price_numeric'].mean()
    avg_income = df_price_filtered['est_income'].mean()

    # V·∫º BI·ªÇU ƒê·ªí
    fig, ax = plt.subplots(figsize=(11, 7))
    fig.patch.set_facecolor('white')
    ax.set_facecolor('white')


    x_pos = np.arange(len(labels))
    bars = ax.bar(x_pos, group_counts, color='#4682B4', width=0.85)

    # Trang tr√≠ tr·ª•c v√† khung
    ax.set_xticks(x_pos)
    ax.set_xticklabels(labels, fontsize=9, color='#333333')
    
    # ·∫®n khung vi·ªÅn tr√™n v√† ph·∫£i
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    
    # Label tr·ª•c
    ax.set_xlabel('Occupancy (last 12 months)', fontsize=10, fontweight='bold', color='#333333')
    ax.set_ylabel('Listings', fontsize=12, fontweight='bold', color='#333333')

    
    # D√≤ng 1: Average Nights Booked
    plt.figtext(0.9, 0.78, f"{int(avg_nights)}", fontsize=32, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.9, 0.74, "average nights booked", fontsize=11, color='#666666', ha='right')
    
    # D√≤ng 2: Price/Night
    plt.figtext(0.9, 0.60, f"${int(avg_price)}", fontsize=22, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.9, 0.57, "price/night", fontsize=11, color='#666666', ha='right')
    
    # D√≤ng 3: Average Income
    plt.figtext(0.9, 0.42, f"‚Ç¨{int(avg_income):,}", fontsize=22, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.9, 0.39, "average income", fontsize=11, color='#666666', ha='right')

    # Ti√™u ƒë·ªÅ
    plt.title(f"Activity - {city_name.upper()}", 
              fontsize=16, fontweight='bold', color='#004e66', loc='left', pad=20)
    
    plt.subplots_adjust(right=0.8) 
    plt.close(fig)
    return fig



In [10]:
def plot_license_analysis(city_name):
    # 1. Load d·ªØ li·ªáu
    df = get_latest_processed_data(city_name)
    if df is None: return

    # 2. X·ª¨ L√ù D·ªÆ LI·ªÜU GI·∫§Y PH√âP
    df['license'] = df['license'].astype(str).str.lower().str.strip()
    
    def categorize_license(val):
        if val == 'nan' or val == '' or val == 'none' or 'unlicensed' in val:
            return 'Unlicensed'
        elif 'exempt' in val:
            return 'Exempt'
        elif 'pending' in val:
            return 'Pending'
        else:
            return 'Licensed'

    df['license_status'] = df['license'].apply(categorize_license)
    
    # --- THAY ƒê·ªîI 1: ƒê·∫£m b·∫£o lu√¥n c√≥ ƒë·ªß 4 nh√≥m (k·ªÉ c·∫£ = 0) ---
    order = ['Licensed', 'Unlicensed', 'Exempt', 'Pending']
    
    # D√πng reindex ƒë·ªÉ √©p bu·ªôc ph·∫£i c√≥ ƒë·ªß c√°c index trong 'order', thi·∫øu th√¨ ƒëi·ªÅn 0
    status_counts = df['license_status'].value_counts().reindex(order, fill_value=0)
    
    labels = status_counts.index.tolist()
    counts = status_counts.values.tolist()
    total = sum(counts)
    

    max_idx = np.argmax(counts)
    winner_label = labels[max_idx]
    winner_count = counts[max_idx]
    winner_pct = (winner_count / total * 100) if total > 0 else 0

    # 3. V·∫º BI·ªÇU ƒê·ªí DONUT
    fig, ax = plt.subplots(figsize=(11, 7))
    fig.patch.set_facecolor('white')
    
    color_map = {
        'Licensed': '#1f77b4',   
        'Unlicensed': '#aec7e8', 
        'Exempt': '#ff7f0e',     
        'Pending': '#ffbb78'     
    }
    colors = [color_map[L] for L in labels]

    wedges, texts = ax.pie(
        counts, 
        startangle=90, 
        colors=colors,
        wedgeprops=dict(width=0.35, edgecolor='white')
    )
    

    unlicensed_count = status_counts.get('Unlicensed', 0)
    unlicensed_pct = (unlicensed_count / total) * 100
    # % UNLICENSED ---
    ax.text(1.3, 0.8, f"{unlicensed_pct:.1f}%", fontsize=36, fontweight='bold', color='#333333', ha='center')
    ax.text(1.3, 0.65, "unlicensed", fontsize=12, color='#666666', ha='center')
    # --- DANH S√ÅCH CHI TI·∫æT (V√≤ng l·∫∑p hi·ªÉn th·ªã c·∫£ s·ªë 0) ---
    y_start = 0.45
    for i, label in enumerate(labels):
        count = counts[i]
        pct = (count / total * 100) if total > 0 else 0
        color = color_map[label]

        text_stats = f"{count:,} ({pct:.1f}%)"
        

        weight = 'bold' if label == winner_label else 'normal'
        
        ax.text(1.3, y_start, text_stats, fontsize=12, fontweight='bold', color='#333333', ha='center')

        ax.text(1.3, y_start - 0.1, label.lower(), fontsize=11, color=color, ha='center', fontweight=weight)
        
        y_start -= 0.25 

    plt.title(f"Licenses Compliance - {city_name.upper()}", 
              fontsize=16, fontweight='bold', color='#004e66', loc='center', pad=20)
    

    
    # T·∫°o c√°c "mi·∫øng gi·∫£" (proxy artists) ƒë·ªÉ ƒë·∫£m b·∫£o Legend lu√¥n hi·ªán ƒë·ªß 4 m√†u

    patches = [mpatches.Patch(color=color_map[L], label=L) for L in labels]
    
    ax.legend(handles=patches, title="Status", loc="center", bbox_to_anchor=(0.5, 0.5), frameon=False)

    plt.tight_layout()
    plt.close(fig)
    return fig



In [11]:
def plot_short_term_rentals(city_name):
    #  Load d·ªØ li·ªáu
    df = get_latest_processed_data(city_name)
    if df is None: return

    # Chuy·ªÉn ƒë·ªïi minimum_nights sang s·ªë
    df['minimum_nights'] = pd.to_numeric(df['minimum_nights'], errors='coerce')
    df = df.dropna(subset=['minimum_nights'])
    
    # Theo lu·∫≠t qu·ªëc t·∫ø chung, d∆∞·ªõi 30 ƒë√™m l√† Short-term
    STR_THRESHOLD = 30
    
    # Ph√¢n lo·∫°i
    short_term = df[df['minimum_nights'] < STR_THRESHOLD]
    long_term = df[df['minimum_nights'] >= STR_THRESHOLD]
    
    count_str = len(short_term)
    count_ltr = len(long_term)
    total = count_str + count_ltr
    
    pct_str = (count_str / total) * 100
    pct_ltr = (count_ltr / total) * 100


    plot_data = df['minimum_nights'].clip(upper=35)
    

    counts = plot_data.value_counts().sort_index()
    
    #  V·∫º BI·ªÇU ƒê·ªí
    fig, ax = plt.subplots(figsize=(11, 7))
    fig.patch.set_facecolor('white')
    ax.set_facecolor('white')
    

    bars = ax.bar(counts.index, counts.values, color='#1f77b4', width=0.6)
    

    # V·∫Ω ƒë∆∞·ªùng ƒë·ª©t n√©t t·∫°i m·ªëc 30 ƒë√™m
    ax.axvline(x=STR_THRESHOLD - 0.5, color='#333333', linestyle='--', linewidth=2, alpha=0.7)
    
    # Ch√∫ th√≠ch cho ƒë∆∞·ªùng ranh gi·ªõi (Xoay d·ªçc 90 ƒë·ªô)
    ax.text(STR_THRESHOLD - 1.5, ax.get_ylim()[1]*0.5, 'STR Threshold (30 days)', 
            rotation=90, color='#333333', fontweight='bold', ha='center')

    #  TRANG TR√ç TR·ª§C
    # Ch·ªâ hi·ªÉn th·ªã c√°c m·ªëc quan tr·ªçng ·ªü tr·ª•c X gi·ªëng ·∫£nh m·∫´u
    major_ticks = [1, 2, 3, 4, 5, 6, 7, 14, 21, 28, 35]
    ax.set_xticks(major_ticks)
    
    # ƒê·ªïi nh√£n 35 th√†nh "35+"
    xtick_labels = [str(t) if t < 36 -1 else "35+" for t in major_ticks]
    ax.set_xticklabels(xtick_labels, fontsize=10)
    
    ax.set_xlabel('Minimum Nights', fontsize=11, fontweight='bold', color='#333333')
    ax.set_ylabel('Listings', fontsize=11, fontweight='bold', color='#333333')
    
    # ·∫®n khung vi·ªÅn (Despine)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_color('#888888')
    ax.spines['bottom'].set_color('#888888')

    # --- BIG NUMBER ---
    plt.figtext(0.75, 0.78, f"{pct_str:.1f}%", fontsize=36 - 9, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.75, 0.75, "short-term rentals", fontsize=36 - 26, color="#A61F1F", ha='right')
    

    # D√≤ng 1: Short-term
    plt.figtext(0.75, 0.60, f"{count_str:,} ({pct_str:.1f}%)", fontsize=10, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.75, 0.58, "short-term rentals", fontsize=9, color='#666666', ha='right')
    
    # D√≤ng 2: Long-term
    plt.figtext(0.75, 0.45, f"{count_ltr:,} ({pct_ltr:.1f}%)", fontsize=10, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.75, 0.43, "longer-term rentals", fontsize=9, color='#666666', ha='right')

    # Ti√™u ƒë·ªÅ
    plt.title(f"Short-Term Rentals Analysis - {city_name.upper()}", 
              fontsize=16, fontweight='bold', color='#004e66', loc='center', pad=20)
    
    # Thu h·∫πp bi·ªÉu ƒë·ªì ƒë·ªÉ nh∆∞·ªùng ch·ªó cho Text b√™n ph·∫£i
    plt.subplots_adjust(right=0.75)
    plt.close(fig)
    return fig


In [12]:
def plot_listings_per_host(city_name):
    #  Load d·ªØ li·ªáu
    df = get_latest_processed_data(city_name)
    if df is None: return

    #  X·ª¨ L√ù D·ªÆ LI·ªÜU
    # ƒê·∫£m b·∫£o c·ªôt calculated_host_listings_count l√† s·ªë
    df['calculated_host_listings_count'] = pd.to_numeric(df['calculated_host_listings_count'], errors='coerce')
    
    # Gom nh√≥m: T·∫•t c·∫£ c√°c host c√≥ > 10 ph√≤ng s·∫Ω ƒë∆∞·ª£c g·ªôp v√†o nh√≥m 10
    #  Tr·ª•c X l√† "Listings per host", Tr·ª•c Y l√† "S·ªë l∆∞·ª£ng Listings" thu·ªôc nh√≥m ƒë√≥
    plot_data = df['calculated_host_listings_count'].clip(upper=10)
    
    # ƒê·∫øm s·ªë l∆∞·ª£ng listing trong t·ª´ng nh√≥m (1, 2, ..., 10+)
    counts = plot_data.value_counts().sort_index()
    

    # Single: Nh√≥m 1
    single_listings_count = counts.get(1, 0)
    
    # Multi: T·ªïng c√°c nh√≥m t·ª´ 2 ƒë·∫øn 10+
    multi_listings_count = len(df) - single_listings_count
    
    total_listings = single_listings_count + multi_listings_count
    
    # T√≠nh ph·∫ßn trƒÉm
    pct_multi = (multi_listings_count / total_listings) * 100 if total_listings > 0 else 0
    pct_single = (single_listings_count / total_listings) * 100 if total_listings > 0 else 0

    # 3. V·∫º BI·ªÇU ƒê·ªí
    fig, ax = plt.subplots(figsize=(11, 7))
    fig.patch.set_facecolor('white')
    ax.set_facecolor('white')
    
    # V·∫Ω c√°c c·ªôt 
    x_labels = [str(int(i)) if i < 10 else "10+" for i in counts.index]
    
    bars = ax.bar(x_labels, counts.values, color='#4682B4', width=0.7) # SteelBlue
    
    # TRANG TR√ç TR·ª§C
    ax.set_xlabel('Listings per Host', fontsize=11, fontweight='bold', color='#333333')
    ax.set_ylabel('Listings', fontsize=11, fontweight='bold', color='#333333')
    
    # ·∫®n khung vi·ªÅn
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_color('#888888')
    ax.spines['bottom'].set_color('#888888')
    
    # --- BIG NUMBER: % MULTI-LISTINGS ---
    plt.figtext(0.85, 0.78, f"{pct_multi:.1f}%", fontsize=36, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.85, 0.74, "multi-listings", fontsize=12, color='#666666', ha='right')
    
    # D√≤ng 1: Single Listings
    plt.figtext(0.85, 0.60, f"{single_listings_count:,} ({pct_single:.1f}%)", fontsize=14, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.85, 0.57, "single listings", fontsize=11, color='#666666', ha='right')
    
    # D√≤ng 2: Multi Listings
    plt.figtext(0.85, 0.45, f"{multi_listings_count:,} ({pct_multi:.1f}%)", fontsize=14, fontweight='bold', color='#333333', ha='right')
    plt.figtext(0.85, 0.42, "multi-listings", fontsize=11, color='#666666', ha='right')

    # Ti√™u ƒë·ªÅ
    plt.title(f"Listings per Host - {city_name.upper()}", 
              fontsize=16, fontweight='bold', color='#004e66', loc='left', pad=20)
    
    # Thu h·∫πp bi·ªÉu ƒë·ªì ƒë·ªÉ nh∆∞·ªùng ch·ªó cho Text b√™n ph·∫£i
    plt.subplots_adjust(right=0.85)
    plt.close(fig)
    return fig



In [13]:
def plot_list_top_30_hosts(city_name):
    # LOAD V√Ä X·ª¨ L√ù D·ªÆ LI·ªÜU 
    df = get_latest_processed_data(city_name)
    if df is None: return None 

    df['host_name'] = df['host_name'].fillna('Unknown')
    
    host_view = df.pivot_table(
        index=['host_id', 'host_name'],
        columns='room_type',
        values='id',
        aggfunc='count',
        fill_value=0
    )

    cols = ['Entire home/apt', 'Private room', 'Shared room', 'Hotel room']
    host_view = host_view.reindex(columns=cols, fill_value=0)

    host_view['Total'] = host_view.sum(axis=1)
    
    # L·∫•y Top 30
    top_30 = host_view.sort_values(by='Total', ascending=False).head(30)

    display_df = top_30.reset_index().drop(columns=['host_id'])
    display_df['host_name'] = display_df['host_name'].apply(lambda x: str(x)[:25] + '..' if len(str(x)) > 25 else str(x))
    display_df.columns = ['Host Name', 'Entire', 'Private', 'Shared', 'Hotel', 'TOTAL']

    fig_height = len(display_df) * 0.3 + 1 
    fig, ax = plt.subplots(figsize=(10, fig_height)) 

    # ·∫®n tr·ª•c t·ªça ƒë·ªô (x, y)
    ax.axis('tight')
    ax.axis('off')
    table = ax.table(
        cellText=display_df.values,
        colLabels=display_df.columns,
        loc='center',
        cellLoc='center',
        colColours=["#61C416"]*len(display_df.columns) 
    )

    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5) 
    for (row, col), cell in table.get_celld().items():
        if row == 0:
            cell.set_text_props(color='white', weight='bold')

    ax.set_title(f"Top 30 Hosts in {city_name}", fontsize=14, fontweight='bold', pad=20)

    plt.tight_layout()
    plt.close(fig)
    
    return fig 


In [14]:
class SearchableComboBox:
    def __init__(self, parent, options):
        self.parent = parent
        self.options = options
        self.dropdown_id = None

        # T·∫°o Frame g√≥i b√™n ngo√†i (Wrapper)
        # D√πng Frame th∆∞·ªùng (kh√¥ng ph·∫£i ttk) ƒë·ªÉ d·ªÖ ch·ªânh m√†u n·ªÅn tr·∫Øng
        self.wrapper = Frame(parent, bg="white") 

        # √î nh·∫≠p li·ªáu (Entry)
        self.entry = ttk.Entry(self.wrapper, font=("Segoe UI", 14))
        self.entry.bind("<KeyRelease>", self.on_entry_key) # ƒê√£ s·ª≠a l·ªói ch√≠nh t·∫£ KeyRealease
        self.entry.bind("<FocusIn>", self.show_dropdown)
        self.entry.pack(side=LEFT, fill=BOTH, expand=True)

        # N√∫t m≈©i t√™n (Thay icon b·∫±ng text "‚ñº" ƒë·ªÉ code ch·∫°y ngay kh√¥ng c·∫ßn file ·∫£nh)
        self.btn = Button(self.wrapper, text="‚ñº", font=("Segoe UI", 10),
                          relief="flat", bg="white", command=self.toggle_dropdown)
        self.btn.pack(side=RIGHT, fill=Y)

        # Listbox (Danh s√°ch g·ª£i √Ω)
        # L∆∞u √Ω: D√πng Listbox c·ªßa tkinter (ttk kh√¥ng c√≥ Listbox)
        self.listbox = Listbox(parent, font=("Segoe UI", 12), height=10, bg="white", bd=1, relief="solid")
        self.listbox.bind("<<ListboxSelect>>", self.on_select)
        
        self.update_listbox(self.options)

    # H√†m h·ªó tr·ª£ ƒë·ªÉ pack wrapper v√†o giao di·ªán ch√≠nh
    def pack(self, **kwargs):
        self.wrapper.pack(**kwargs)

    # H√†m l·∫•y gi√° tr·ªã hi·ªán t·∫°i
    def get(self):
        return self.entry.get()

    def update_listbox(self, items):
        self.listbox.delete(0, END)
        for item in items:
            self.listbox.insert(END, item)

    def on_entry_key(self, event):
        typed_value = event.widget.get().strip().lower()
        if not typed_value:
            filtered_options = self.options
        else:
            filtered_options = [opt for opt in self.options if opt.lower().startswith(typed_value)]
        
        self.update_listbox(filtered_options)
        self.show_dropdown()

    def on_select(self, event):
        if self.listbox.curselection():
            selected_index = self.listbox.curselection()[0]
            selected_option = self.listbox.get(selected_index)
            
            self.entry.delete(0, END)
            self.entry.insert(0, selected_option)
            self.hide_dropdown()

    def toggle_dropdown(self):
        if self.listbox.winfo_ismapped():
            self.hide_dropdown()
        else:
            self.show_dropdown()

    def show_dropdown(self, event=None):
        # T√≠nh to√°n v·ªã tr√≠ ƒë·ªÉ Listbox hi·ªán ngay d∆∞·ªõi Entry
        x = self.wrapper.winfo_x()
        y = self.wrapper.winfo_y() + self.wrapper.winfo_height()
        w = self.wrapper.winfo_width()
        
        # D√πng place ƒë·ªÉ ƒë√® l√™n c√°c widget kh√°c
        self.listbox.place(in_=self.wrapper, x=0, rely=1.0, relwidth=1.0, anchor="nw")
        self.listbox.lift()

        # T·ª± ƒë·ªông ·∫©n sau 5 gi√¢y n·∫øu kh√¥ng thao t√°c
        if self.dropdown_id:
            self.listbox.after_cancel(self.dropdown_id)
        self.dropdown_id = self.listbox.after(5000, self.hide_dropdown)

    def hide_dropdown(self, event=None):
        self.listbox.place_forget()


# --- 2. MAIN APP ---
# --- MAIN APP (GUI) ---
def Main_app():
    try:
        df = pd.read_csv('cities_mapping.csv')
        all_cities = sorted(df['city'].dropna().astype(str).unique().tolist())
    except: 
        all_cities = ["London", "Paris", "New York"]

    window = Tk()
    window.title("Data Explorer")
    window.state('zoomed') # Ph√≥ng to to√†n m√†n h√¨nh
    window.config(background="#F7F9FC")

    current_frame = None

    def switch_frame(new_frame_func, *args):
        nonlocal current_frame
        if current_frame:
            current_frame.destroy()
        current_frame = new_frame_func(*args)
        current_frame.pack(fill="both", expand=True)

    # --- M√ÄN H√åNH T√åM KI·∫æM ---
    def create_search_screen():
        frame = Frame(window, bg="#F7F9FC")
        card = Frame(frame, bg="white", padx=40, pady=40)
        card.place(relx=0.5, rely=0.5, anchor="center")

        Label(card, text="Data Explorer", font=("Segoe UI", 30, "bold"), fg="#FF5A5F", bg="white").pack(pady=(0, 10))
        
        search_combo = SearchableComboBox(card, all_cities)
        search_combo.pack(ipady=5, pady=20, fill=X)

        def run_analysis():
            city = search_combo.get()
            if city:
                switch_frame(create_analysis_screen, city)
            else:
                from tkinter import messagebox
                messagebox.showwarning("Nh·∫Øc nh·ªü", "Vui l√≤ng ch·ªçn m·ªôt th√†nh ph·ªë!")

        Button(card, text="PH√ÇN T√çCH D·ªÆ LI·ªÜU", font=("Segoe UI", 12, "bold"), bg="#FF5A5F", fg="white",
               relief="flat", padx=30, pady=12, command=run_analysis).pack()
        return frame

    # --- M√ÄN H√åNH DASHBOARD ---
    def create_analysis_screen(city_name):
        frame = Frame(window, bg="white")
        
        # Header
        header = Frame(frame, bg="#FF5A5F", height=60)
        header.pack(fill=X, side=TOP)
        Button(header, text="‚¨Ö Quay l·∫°i", font=("Segoe UI", 11), bg="white", fg="#FF5A5F", relief="flat",
               command=lambda: switch_frame(create_search_screen)).pack(side=LEFT, padx=20, pady=10)
        Label(header, text=f"B√ÅO C√ÅO PH√ÇN T√çCH: {city_name.upper()}", font=("Segoe UI", 16, "bold"), bg="#FF5A5F", fg="white").pack(side=LEFT, padx=20)

        # Body
        content = Frame(frame, bg="white")
        content.pack(fill=BOTH, expand=True)

        # C·ªôt Tr√°i: Th√¥ng tin & Map
        left_panel = Frame(content, bg="#F7F7F7", width=400, padx=20, pady=20)
        left_panel.pack(side=LEFT, fill=Y)
        left_panel.pack_propagate(False) 

        Label(left_panel, text="TI·∫æN TR√åNH", font=("Segoe UI", 14, "bold"), bg="#F7F7F7", fg="#333").pack(anchor="w")
        status_log = Text(left_panel, height=15, width=40, font=("Consolas", 9), bg="white", relief="flat", state=DISABLED)
        status_log.pack(pady=10, fill=X)

        Label(left_panel, text="B·∫¢N ƒê·ªí ƒê·ªäA L√ù", font=("Segoe UI", 14, "bold"), bg="#F7F7F7", fg="#333").pack(anchor="w", pady=(20, 0))
        btn_open_map = Button(left_panel, text="M·ªü B·∫£n ƒê·ªì (ph·∫£i l√™n web)", font=("Segoe UI", 11), bg="#00A699", fg="white", 
                              state=DISABLED, command=lambda: webbrowser.open(f"spatial_analysis_{city_name}.html"))
        btn_open_map.pack(pady=10, fill=X)

        # C·ªôt Ph·∫£i: Bi·ªÉu ƒë·ªì (C√≥ thanh cu·ªôn)
        right_panel = Frame(content, bg="white")
        right_panel.pack(side=RIGHT, fill=BOTH, expand=True)

        canvas = Canvas(right_panel, bg="white")
        scrollbar = Scrollbar(right_panel, orient="vertical", command=canvas.yview)
        scrollable_frame = Frame(canvas, bg="white")

        scrollable_frame.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))
        canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
        canvas.configure(yscrollcommand=scrollbar.set)
        
        canvas.pack(side=LEFT, fill=BOTH, expand=True)
        scrollbar.pack(side=RIGHT, fill=Y)

        # H√†m ghi log
        def log(msg):
            status_log.config(state=NORMAL)
            status_log.insert(END, ">> " + msg + "\n")
            status_log.see(END)
            status_log.config(state=DISABLED)
            window.update()


        def run_pipeline():
            try:
                # B1: C√†o d·ªØ li·ªáu
                log("ƒêang c√†o d·ªØ li·ªáu ")
                scrape_robust(city_name) 
                
                # B2: X·ª≠ l√Ω
                log("ƒêang l√†m s·∫°ch d·ªØ li·ªáu")
                process_city_data(city_name)

                # B3: Map
                log("ƒêang t·∫°o b·∫£n ƒë·ªì")
                visualize_map(city_name) 
                
                # Ki·ªÉm tra file map c√≥ t·ªìn t·∫°i kh√¥ng ƒë·ªÉ b·∫≠t n√∫t
                if os.path.exists(f"spatial_analysis_{city_name}.html"):
                    btn_open_map.config(state=NORMAL, bg="#00A699")
                    log("ƒê√£ t·∫°o xong b·∫£n ƒë·ªì!")
                else:
                    log("Kh√¥ng t√¨m th·∫•y file b·∫£n ƒë·ªì (Code visualize_map ch∆∞a ch·∫°y?)")

                # B4: V·∫Ω bi·ªÉu ƒë·ªì
                log("ƒêang v·∫Ω c√°c bi·ªÉu ƒë·ªì th·ªëng k√™...")
                
                charts = [
                    plot_room_type, 
                    plot_activity, 
                    plot_license_analysis, 
                    plot_short_term_rentals, 
                    plot_listings_per_host,
                    plot_list_top_30_hosts
                ]

                for chart_func in charts:
                    try:
                        log(f"ƒêang v·∫Ω: {chart_func.__name__}")
                        window.update()
                        
                        fig = chart_func(city_name)
                        
                        if fig:
                            fig.set_size_inches(11, 7) 
                        
                            fig.tight_layout()

                            canvas_plot = FigureCanvasTkAgg(fig, master=scrollable_frame)
                            canvas_plot.draw()
                            widget = canvas_plot.get_tk_widget()
                            
                            # V·∫´n d√πng pack() ƒë·ªÉ x·∫øp h√†ng d·ªçc
                            widget.pack(pady=10, padx=20, fill=X) 
                            
                        else:
                            log(f"Kh√¥ng c√≥ d·ªØ li·ªáu cho {chart_func.__name__}")
                    except Exception as e:
                        log(f"L·ªói v·∫Ω {chart_func.__name__}: {e}")

                log("--- HO√ÄN T·∫§T ---")

            except Exception as e:
                log(f"L·ªñI NGHI√äM TR·ªåNG: {e}")

        # Ch·∫°y sau 100ms
        window.after(100, run_pipeline)
        return frame

    switch_frame(create_search_screen)
    window.mainloop()

Main_app()