In [None]:

import time, random, re
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import pandas as pd
from IPython.display import HTML

BASE   = "https://www.booking.com"
SEARCH = f"{BASE}/searchresults.html"

HEADERS = {
    "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                   "(KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"),
    "Accept-Language": "en-US,en;q=0.9",
}

CITIES     = ["Aix en Provence", "Collioure", "Carcassonne", "Toulouse", "Biarritz"]
PER_CITY   = 20
ROWS       = 25
PAGE_SLEEP = (1.0, 2.0)
CITY_SLEEP = (3.0, 5.0)
MAX_EMPTY  = 1 

SCORE_RE = re.compile(r"\b(\d{1,2}\.\d)\b")

def clean_url(href: str) -> str:
    u = urljoin(BASE, href or "")
    return u.split("?", 1)[0].split("#", 1)[0]

def extract_cards(html: str):
    soup = BeautifulSoup(html, "html.parser")
    out = []
    for card in soup.select('div[data-testid="property-card"]'):
        a = card.select_one('a[data-testid="title-link"]')
        t = card.select_one('[data-testid="title"]')
        if not a or not t: 
            continue
        name = t.get_text(strip=True)
        url  = clean_url(a.get("href", ""))
        aria = card.select_one('[data-testid="review-score"] [aria-label]')
        if aria and aria.has_attr("aria-label"):
            txt = aria["aria-label"]
        else:
            rs = card.select_one('[data-testid="review-score"]')
            txt = rs.get_text(" ", strip=True) if rs else ""
        m = SCORE_RE.search(txt or "")
        score = m.group(1) if m else None
        out.append({"hotel_name": name, "url": url, "score": score})
    return out

def extract_dest(html: str):
    m = re.search(r'"dest_id"\s*:\s*"([^"]+)"\s*,\s*"dest_type"\s*:\s*"([^"]+)"', html)
    if m: 
        return m.group(1), m.group(2)
    soup = BeautifulSoup(html, "html.parser")
    el = soup.select_one('[data-destination-id][data-destination-type]')
    return (el.get("data-destination-id"), el.get("data-destination-type")) if el else (None, None)

def fetch_city(sess: requests.Session, city: str):
    
    first = {
        "ss": f"{city}, France",
        "lang": "en-us",
        "rows": ROWS,
        "order": "bayesian_review_score",
        "group_adults": 2, "no_rooms": 1,
        
    }
    r0 = sess.get(SEARCH, params=first, timeout=30)
    dest_id, dest_type = extract_dest(r0.text)

    base = {
        "lang": "en-us",
        "rows": ROWS,
        "order": "bayesian_review_score",
        "group_adults": 2, "no_rooms": 1,
    }
    if dest_id and dest_type:
        base.update({"dest_id": dest_id, "dest_type": dest_type})
    else:
        base.update({"ss": f"{city}, France"})

    collected, seen = [], set()
    offset, empty = 0, 0

    while len(collected) < PER_CITY:
        params = dict(base, offset=offset)
        r = sess.get(SEARCH, params=params, timeout=30, allow_redirects=True)
        hotels = extract_cards(r.text)

        if not hotels:
           
            empty += 1
            if empty > MAX_EMPTY:
                break
            params["order"] = "popularity"
            r = sess.get(SEARCH, params=params, timeout=30, allow_redirects=True)
            hotels = extract_cards(r.text)
            if not hotels:
                time.sleep(random.uniform(1.0, 1.8))
                continue
        else:
            empty = 0

        added = 0
        for h in hotels:
            if h["url"] and h["url"] not in seen:
                seen.add(h["url"])
                collected.append({"city": city, **h})
                added += 1
                if len(collected) >= PER_CITY:
                    break

        if added == 0:
            break 

        offset += ROWS
        time.sleep(random.uniform(*PAGE_SLEEP))

    return collected[:PER_CITY]


sess = requests.Session()
sess.headers.update(HEADERS)


home = sess.get("https://www.booking.com/", timeout=15)
sess.headers.update({
    "Referer": "https://www.booking.com/",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
})
time.sleep(2)

rows = []
for city in CITIES:
    got = fetch_city(sess, city)
    rows.extend(got)
    print(f"{city}: {len(got)}")
    time.sleep(random.uniform(*CITY_SLEEP))

df = pd.DataFrame(rows, columns=["city","hotel_name","score","url"])
print(df.shape)


df_show = df.copy()
df_show["url"] = df_show["url"].apply(lambda u: f'<a href="{u}" target="_blank">{u}</a>')
HTML(df_show.to_html(escape=False, index=False))


Aix en Provence: 20
Collioure: 20
Carcassonne: 20
Toulouse: 20
Biarritz: 20
(100, 4)


city,hotel_name,score,url
Aix en Provence,Domaine de Saint Clair,9.7,https://www.booking.com/hotel/fr/domaine-de-saint-clair.html
Aix en Provence,La Villa Rustica,9.8,https://www.booking.com/hotel/fr/la-villa-rustica.html
Aix en Provence,"Les Secrets d'Alcôve, nuits Romantiques avec SPA",9.7,https://www.booking.com/hotel/fr/les-secrets-d-39-alcove-nuitees-romantiques-avec-spa.html
Aix en Provence,"La Bastide de Damien, 5 clefs Demeure d'Exception et Table Remarquable dans le guide Teritoria, JACUZZI Privatif, Cuisine étoilée Dominique Frérard",9.5,https://www.booking.com/hotel/fr/chambres-romantiques-aix-en-provence.html
Aix en Provence,Château de Saint-Girons - Luxury Guest House in Provence,9.8,https://www.booking.com/hotel/fr/chateau-de-saint-girons.html
Aix en Provence,Pavillon de Beauregard,9.5,https://www.booking.com/hotel/fr/pavillon-de-beauregard.html
Aix en Provence,Suite le Quervalat dans magnifique bastide 18eme,9.6,https://www.booking.com/hotel/fr/chambre-d-hote-dans-magnifique-bastide-18eme.html
Aix en Provence,Villa Amara,9.8,https://www.booking.com/hotel/fr/villa-amara.html
Aix en Provence,The View Aix-en-Provence,9.8,https://www.booking.com/hotel/fr/the-view-aix-en-provence.html
Aix en Provence,La Soleillade Aixoise,9.7,https://www.booking.com/hotel/fr/la-soleillade-aix-en-provence.html


In [None]:

import json, time, random, requests
from bs4 import BeautifulSoup
import pandas as pd


HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.booking.com/",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
}

def get_hotel_details(sess: requests.Session, url: str):
    """Возвращает (latitude, longitude, description) для страницы отеля."""
    try:
        r = sess.get(url, timeout=30, allow_redirects=True)
        soup = BeautifulSoup(r.text, "html.parser")

       
        for tag in soup.select('script[type="application/ld+json"]'):
            try:
                data = json.loads(tag.string or tag.text or "")
            except Exception:
                continue
            items = data if isinstance(data, list) else [data]
            for it in items:
                if not isinstance(it, dict): 
                    continue
                geo = it.get("geo") or {}
                lat, lon = geo.get("latitude"), geo.get("longitude")
                desc = it.get("description")
                if lat and lon:
                    return float(lat), float(lon), (desc or None)

        
        atlas = soup.select_one('[data-atlas-latlng]')
        if atlas and atlas.has_attr("data-atlas-latlng"):
            lat_s, lon_s = atlas["data-atlas-latlng"].split(",", 1)
            lat, lon = float(lat_s), float(lon_s)
        else:
            glat = soup.select_one('[data-google-lat]')
            glon = soup.select_one('[data-google-lng]')
            lat = float(glat["data-google-lat"]) if glat else None
            lon = float(glon["data-google-lng"]) if glon else None

       
        desc_el = soup.select_one('#property_description_content, [data-testid="property-description"]')
        desc = desc_el.get_text(" ", strip=True) if desc_el else None

        
        if not desc:
            meta = soup.select_one('meta[name="description"]')
            if meta and meta.has_attr("content"):
                desc = meta["content"].strip()

        return lat, lon, desc

    except Exception:
        return None, None, None


def enrich_urls(df_hotels: pd.DataFrame, pause=(0.6, 1.2)) -> pd.DataFrame:
    """Принимает df с колонкой 'url'. Возвращает details_df с url, lat, lon, description."""
    sess = requests.Session()
    sess.headers.update(HEADERS)
   
    try: sess.get("https://www.booking.com/", timeout=10)
    except Exception: pass

    rows = []
    for i, url in enumerate(df_hotels["url"].tolist(), start=1):
        lat, lon, desc = get_hotel_details(sess, url)
        rows.append({"url": url, "latitude": lat, "longitude": lon, "description": desc})
        
        time.sleep(random.uniform(*pause))
        if i % 20 == 0:
            print(f"…обработано {i} ссылок")

    details_df = pd.DataFrame(rows, columns=["url", "latitude", "longitude", "description"])
    return details_df


details_df = enrich_urls(df)              
details_df.to_csv("hotel_details.csv", index=False)



merged = df.merge(details_df, on="url", how="left")
merged.to_csv("hotels_enriched.csv", index=False)

merged.head()


…обработано 20 ссылок
…обработано 40 ссылок
…обработано 60 ссылок
…обработано 80 ссылок
…обработано 100 ссылок
Сохранил hotel_details.csv
Сохранил hotels_enriched.csv


Unnamed: 0,city,hotel_name,score,url,latitude,longitude,description
0,Aix en Provence,Domaine de Saint Clair,9.7,https://www.booking.com/hotel/fr/domaine-de-sa...,43.499156,5.432056,Domaine de Saint Clair is an adults-only Bed&B...
1,Aix en Provence,La Villa Rustica,9.8,https://www.booking.com/hotel/fr/la-villa-rust...,43.558793,5.448861,Modern Comforts: La Villa Rustica in Aix-en-Pr...
2,Aix en Provence,"Les Secrets d'Alcôve, nuits Romantiques avec SPA",9.7,https://www.booking.com/hotel/fr/les-secrets-d...,43.530856,5.449006,Comfortable Accommodations: Les Secrets d'Alcô...
3,Aix en Provence,"La Bastide de Damien, 5 clefs Demeure d'Except...",9.5,https://www.booking.com/hotel/fr/chambres-roma...,43.591905,5.390908,Historic Charm: La Bastide de Damien in Aix-en...
4,Aix en Provence,Château de Saint-Girons - Luxury Guest House i...,9.8,https://www.booking.com/hotel/fr/chateau-de-sa...,43.49892,5.382602,Historic Charm: Château de Saint-Girons in Aix...


In [1]:
import pandas as pd

In [None]:

df_hotels = pd.read_csv("hotels_enriched.csv")
df_cities = pd.read_csv("cities.csv")

print("hotels_enriched:", df_hotels.shape)
print("cities:", df_cities.shape)

df_hotels.head()


hotels_enriched: (100, 7)
cities: (35, 4)


Unnamed: 0,city,hotel_name,score,url,latitude,longitude,description
0,Aix en Provence,Domaine de Saint Clair,9.7,https://www.booking.com/hotel/fr/domaine-de-sa...,43.499156,5.432056,Domaine de Saint Clair is an adults-only Bed&B...
1,Aix en Provence,La Villa Rustica,9.8,https://www.booking.com/hotel/fr/la-villa-rust...,43.558793,5.448861,Modern Comforts: La Villa Rustica in Aix-en-Pr...
2,Aix en Provence,"Les Secrets d'Alcôve, nuits Romantiques avec SPA",9.7,https://www.booking.com/hotel/fr/les-secrets-d...,43.530856,5.449006,Comfortable Accommodations: Les Secrets d'Alcô...
3,Aix en Provence,"La Bastide de Damien, 5 clefs Demeure d'Except...",9.5,https://www.booking.com/hotel/fr/chambres-roma...,43.591905,5.390908,Historic Charm: La Bastide de Damien in Aix-en...
4,Aix en Provence,Château de Saint-Girons - Luxury Guest House i...,9.8,https://www.booking.com/hotel/fr/chateau-de-sa...,43.49892,5.382602,Historic Charm: Château de Saint-Girons in Aix...


In [None]:

df_hotels.columns = [c.strip() for c in df_hotels.columns]
df_cities.columns = [c.strip() for c in df_cities.columns]


df_hotels = df_hotels.rename(columns={"city": "city_name"})
df_hotels["city_name"] = df_hotels["city_name"].astype(str).str.strip()
df_cities["city_name"] = df_cities["city_name"].astype(str).str.strip()

df_hotels.head()


Unnamed: 0,city_name,hotel_name,score,url,latitude,longitude,description
0,Aix en Provence,Domaine de Saint Clair,9.7,https://www.booking.com/hotel/fr/domaine-de-sa...,43.499156,5.432056,Domaine de Saint Clair is an adults-only Bed&B...
1,Aix en Provence,La Villa Rustica,9.8,https://www.booking.com/hotel/fr/la-villa-rust...,43.558793,5.448861,Modern Comforts: La Villa Rustica in Aix-en-Pr...
2,Aix en Provence,"Les Secrets d'Alcôve, nuits Romantiques avec SPA",9.7,https://www.booking.com/hotel/fr/les-secrets-d...,43.530856,5.449006,Comfortable Accommodations: Les Secrets d'Alcô...
3,Aix en Provence,"La Bastide de Damien, 5 clefs Demeure d'Except...",9.5,https://www.booking.com/hotel/fr/chambres-roma...,43.591905,5.390908,Historic Charm: La Bastide de Damien in Aix-en...
4,Aix en Provence,Château de Saint-Girons - Luxury Guest House i...,9.8,https://www.booking.com/hotel/fr/chateau-de-sa...,43.49892,5.382602,Historic Charm: Château de Saint-Girons in Aix...


In [None]:

if "score" in df_hotels.columns and "rating" not in df_hotels.columns:
    df_hotels = df_hotels.rename(columns={"score": "rating"})


if "rating" in df_hotels.columns:
    df_hotels["rating"] = pd.to_numeric(df_hotels["rating"], errors="coerce")



df_hotels[["city_name", "hotel_name", "rating"]].head()


Unnamed: 0,city_name,hotel_name,rating
0,Aix en Provence,Domaine de Saint Clair,9.7
1,Aix en Provence,La Villa Rustica,9.8
2,Aix en Provence,"Les Secrets d'Alcôve, nuits Romantiques avec SPA",9.7
3,Aix en Provence,"La Bastide de Damien, 5 clefs Demeure d'Except...",9.5
4,Aix en Provence,Château de Saint-Girons - Luxury Guest House i...,9.8


In [None]:

df_hotels = df_hotels.merge(
    df_cities[["city_id", "city_name"]],
    on="city_name",
    how="left"
)

unmatched = df_hotels[df_hotels["city_id"].isna()]["city_name"].unique().tolist()
print("Villes non appariées (city_id manquant):", unmatched)

df_hotels.head()


Villes non appariées (city_id manquant): []


Unnamed: 0,city_name,hotel_name,rating,url,latitude,longitude,description,city_id
0,Aix en Provence,Domaine de Saint Clair,9.7,https://www.booking.com/hotel/fr/domaine-de-sa...,43.499156,5.432056,Domaine de Saint Clair is an adults-only Bed&B...,2
1,Aix en Provence,La Villa Rustica,9.8,https://www.booking.com/hotel/fr/la-villa-rust...,43.558793,5.448861,Modern Comforts: La Villa Rustica in Aix-en-Pr...,2
2,Aix en Provence,"Les Secrets d'Alcôve, nuits Romantiques avec SPA",9.7,https://www.booking.com/hotel/fr/les-secrets-d...,43.530856,5.449006,Comfortable Accommodations: Les Secrets d'Alcô...,2
3,Aix en Provence,"La Bastide de Damien, 5 clefs Demeure d'Except...",9.5,https://www.booking.com/hotel/fr/chambres-roma...,43.591905,5.390908,Historic Charm: La Bastide de Damien in Aix-en...,2
4,Aix en Provence,Château de Saint-Girons - Luxury Guest House i...,9.8,https://www.booking.com/hotel/fr/chateau-de-sa...,43.49892,5.382602,Historic Charm: Château de Saint-Girons in Aix...,2


In [None]:

sort_cols = [c for c in ["city_id", "hotel_name", "url"] if c in df_hotels.columns]
df_hotels = df_hotels.sort_values(sort_cols).reset_index(drop=True)
df_hotels.insert(0, "hotel_id", range(1, len(df_hotels) + 1))

final_cols = [
    "hotel_id",
    "city_id",
    "city_name",
    "hotel_name",
    "url",
    "latitude",
    "longitude",
    "rating",
    "description"
]


final_cols_existing = [c for c in final_cols if c in df_hotels.columns]
df_hotels_final = df_hotels[final_cols_existing].copy()

df_hotels_final.head()


Unnamed: 0,hotel_id,city_id,city_name,hotel_name,url,latitude,longitude,rating,description
0,1,2,Aix en Provence,CABADOL - Appartement Aix centre historique - ...,https://www.booking.com/hotel/fr/appartement-a...,43.525963,5.451841,9.4,Modern Comforts: CABADOL - Appartement Aix cen...
1,2,2,Aix en Provence,Chambre style balinais,https://www.booking.com/hotel/fr/chambre-style...,43.528428,5.452377,9.4,Spacious Accommodations: Chambre style balinai...
2,3,2,Aix en Provence,Château de Saint-Girons - Luxury Guest House i...,https://www.booking.com/hotel/fr/chateau-de-sa...,43.49892,5.382602,9.8,Historic Charm: Château de Saint-Girons in Aix...
3,4,2,Aix en Provence,Domaine de Saint Clair,https://www.booking.com/hotel/fr/domaine-de-sa...,43.499156,5.432056,9.7,Domaine de Saint Clair is an adults-only Bed&B...
4,5,2,Aix en Provence,Duplex indépendant avec clim et parking St Donat,https://www.booking.com/hotel/fr/independent-a...,43.560699,5.451552,9.8,Essential Facilities: Duplex indépendant avec ...


In [None]:

df_hotels_final.to_csv("hotels.csv", index=False)

print("✅ hotels.csv sauvegardé:", df_hotels_final.shape)
print("Nombre de villes dans hotels.csv:", df_hotels_final["city_id"].nunique() if "city_id" in df_hotels_final.columns else "N/A")
print("Nombre d'hôtels:", df_hotels_final["hotel_id"].nunique())
print("Nombre de ratings manquants:", df_hotels_final["rating"].isna().sum() if "rating" in df_hotels_final.columns else "N/A")


✅ hotels.csv sauvegardé: (100, 9)
Nombre de villes dans hotels.csv: 5
Nombre d'hôtels: 100
Nombre de ratings manquants: 1


In [None]:
import pandas as pd
import plotly.express as px

cities = pd.read_csv("cities.csv")
cities.columns = [c.strip() for c in cities.columns]


weather_daily = None
try:
    weather_daily = pd.read_csv("weather_daily.csv")
except FileNotFoundError:
    pass

hotels = pd.read_csv("hotels.csv")
hotels.columns = [c.strip() for c in hotels.columns]

cities.head(), hotels.head()


(   city_id        city_name   latitude  longitude
 0        1    Aigues Mortes  43.566152   4.191540
 1        2  Aix en Provence  43.529842   5.447474
 2        3           Amiens  49.894171   2.295695
 3        4           Annecy  45.899235   6.128885
 4        5           Ariege  42.945537   1.406554,
    hotel_id  city_id        city_name  \
 0         1        2  Aix en Provence   
 1         2        2  Aix en Provence   
 2         3        2  Aix en Provence   
 3         4        2  Aix en Provence   
 4         5        2  Aix en Provence   
 
                                           hotel_name  \
 0  CABADOL - Appartement Aix centre historique - ...   
 1                             Chambre style balinais   
 2  Château de Saint-Girons - Luxury Guest House i...   
 3                             Domaine de Saint Clair   
 4   Duplex indépendant avec clim et parking St Donat   
 
                                                  url   latitude  longitude  \
 0  https://www.

In [12]:
import pandas as pd
import plotly.express as px

cities = pd.read_csv("cities.csv")
weather = pd.read_csv("weather_daily.csv")

cities.columns = [c.strip() for c in cities.columns]
weather.columns = [c.strip() for c in weather.columns]

cities.head(), weather.head()


(   city_id        city_name   latitude  longitude
 0        1    Aigues Mortes  43.566152   4.191540
 1        2  Aix en Provence  43.529842   5.447474
 2        3           Amiens  49.894171   2.295695
 3        4           Annecy  45.899235   6.128885
 4        5           Ariege  42.945537   1.406554,
    weather_id  city_id      city_name forecast_date  temp_mean_c  temp_min_c  \
 0           1        1  Aigues Mortes    2025-10-14    18.676667       14.96   
 1           2        1  Aigues Mortes    2025-10-15    16.600000       11.48   
 2           3        1  Aigues Mortes    2025-10-16    16.935000       13.22   
 3           4        1  Aigues Mortes    2025-10-17    16.573750       12.95   
 4           5        1  Aigues Mortes    2025-10-18    16.010000       12.37   
 
    temp_max_c  pop_mean  pop_max  rain_mm_sum  
 0       21.60       0.0      0.0          0.0  
 1       21.08       0.0      0.0          0.0  
 2       21.16       0.0      0.0          0.0  
 3       

In [None]:

weather_city = (
    weather
    .groupby("city_id", as_index=False)
    .agg(
        day_temperature=("temp_mean_c", "mean"),
        rain_total=("rain_mm_sum", "sum"),
        pop_mean=("pop_mean", "mean")
    )
)

weather_city.head()


Unnamed: 0,city_id,day_temperature,rain_total,pop_mean
0,1,16.658236,0.0,0.0
1,2,17.334917,0.75,0.029375
2,3,12.060292,0.0,0.0
3,4,10.527569,0.0,0.0
4,5,13.122486,4.62,0.179444


In [14]:
map_df = weather_city.merge(
    cities,
    on="city_id",
    how="left"
)

map_df.head()


Unnamed: 0,city_id,day_temperature,rain_total,pop_mean,city_name,latitude,longitude
0,1,16.658236,0.0,0.0,Aigues Mortes,43.566152,4.19154
1,2,17.334917,0.75,0.029375,Aix en Provence,43.529842,5.447474
2,3,12.060292,0.0,0.0,Amiens,49.894171,2.295695
3,4,10.527569,0.0,0.0,Annecy,45.899235,6.128885
4,5,13.122486,4.62,0.179444,Ariege,42.945537,1.406554


In [15]:
fig = px.scatter_mapbox(
    map_df,
    lat="latitude",
    lon="longitude",
    size="day_temperature",
    color="day_temperature",
    color_continuous_scale="Bluered",
    size_max=30,
    zoom=4.2,
    hover_name="city_name",
    hover_data={
        "day_temperature": True,
        "rain_total": True,
        "pop_mean": True,
        "latitude": False,
        "longitude": False
    },
    title="Conditions météorologiques moyennes par ville"
)

fig.update_layout(
    mapbox_style="carto-positron",
    height=700
)

fig.show()


In [16]:
TOP5_CITIES = [
    "Aix en Provence",
    "Biarritz",
    "Carcassonne",
    "Collioure",
    "Toulouse"
]


In [None]:
import pandas as pd
import plotly.express as px

cities = pd.read_csv("cities.csv")
weather = pd.read_csv("weather_daily.csv")

cities.columns = [c.strip() for c in cities.columns]
weather.columns = [c.strip() for c in weather.columns]


cities_top5 = cities[cities["city_name"].isin(TOP5_CITIES)].copy()

cities_top5


Unnamed: 0,city_id,city_name,latitude,longitude
1,2,Aix en Provence,43.529842,5.447474
9,10,Biarritz,43.483252,-1.559278
11,12,Carcassonne,43.213036,2.349107
14,15,Collioure,42.52505,3.083155
33,34,Toulouse,43.604464,1.444243


In [18]:
weather_top5 = weather[weather["city_id"].isin(cities_top5["city_id"])]

weather_city = (
    weather_top5
    .groupby("city_id", as_index=False)
    .agg(
        day_temperature=("temp_mean_c", "mean"),
        rain_total=("rain_mm_sum", "sum"),
        pop_mean=("pop_mean", "mean")
    )
)

weather_city


Unnamed: 0,city_id,day_temperature,rain_total,pop_mean
0,2,17.334917,0.75,0.029375
1,10,17.002458,0.0,0.0
2,12,17.150194,0.0,0.003333
3,15,17.329778,1.22,0.049306
4,34,17.544708,0.0,0.0


In [19]:
map_df = weather_city.merge(
    cities_top5,
    on="city_id",
    how="left"
)

map_df


Unnamed: 0,city_id,day_temperature,rain_total,pop_mean,city_name,latitude,longitude
0,2,17.334917,0.75,0.029375,Aix en Provence,43.529842,5.447474
1,10,17.002458,0.0,0.0,Biarritz,43.483252,-1.559278
2,12,17.150194,0.0,0.003333,Carcassonne,43.213036,2.349107
3,15,17.329778,1.22,0.049306,Collioure,42.52505,3.083155
4,34,17.544708,0.0,0.0,Toulouse,43.604464,1.444243


In [20]:
fig = px.scatter_mapbox(
    map_df,
    lat="latitude",
    lon="longitude",
    size="day_temperature",
    color="day_temperature",
    color_continuous_scale="Bluered",
    size_max=35,
    zoom=4.5,
    hover_name="city_name",
    hover_data={
        "day_temperature": True,
        "rain_total": True,
        "pop_mean": True
    },
    title="Top-5 destinations selon les conditions météorologiques"
)

fig.update_layout(
    mapbox_style="carto-positron",
    height=700
)

fig.show()


In [None]:

TOP5_CITIES = [
    "Aix en Provence",
    "Biarritz",
    "Carcassonne",
    "Collioure",
    "Toulouse"
]


In [None]:
import pandas as pd
import plotly.express as px


cities = pd.read_csv("cities.csv")
hotels = pd.read_csv("hotels.csv")


cities.columns = [c.strip() for c in cities.columns]
hotels.columns = [c.strip() for c in hotels.columns]

cities.head(), hotels.head()


(   city_id        city_name   latitude  longitude
 0        1    Aigues Mortes  43.566152   4.191540
 1        2  Aix en Provence  43.529842   5.447474
 2        3           Amiens  49.894171   2.295695
 3        4           Annecy  45.899235   6.128885
 4        5           Ariege  42.945537   1.406554,
    hotel_id  city_id        city_name  \
 0         1        2  Aix en Provence   
 1         2        2  Aix en Provence   
 2         3        2  Aix en Provence   
 3         4        2  Aix en Provence   
 4         5        2  Aix en Provence   
 
                                           hotel_name  \
 0  CABADOL - Appartement Aix centre historique - ...   
 1                             Chambre style balinais   
 2  Château de Saint-Girons - Luxury Guest House i...   
 3                             Domaine de Saint Clair   
 4   Duplex indépendant avec clim et parking St Donat   
 
                                                  url   latitude  longitude  \
 0  https://www.

In [None]:

cities_top5 = cities[cities["city_name"].isin(TOP5_CITIES)][["city_id", "city_name"]]


hotels_top = hotels[hotels["city_id"].isin(cities_top5["city_id"])].copy()

hotels_top.shape


(100, 9)

In [None]:

if "rating" in hotels_top.columns:
    hotels_top["rating"] = pd.to_numeric(hotels_top["rating"], errors="coerce")


hotels_top20 = (
    hotels_top
    .sort_values(["city_id", "rating"], ascending=[True, False])
    .groupby("city_id")
    .head(20)
    .reset_index(drop=True)
)

hotels_top20.shape


(100, 9)

In [None]:
fig_hotels = px.scatter_mapbox(
    hotels_top20,
    lat="latitude",
    lon="longitude",
    color="city_name",                 
    hover_name="hotel_name",
    hover_data={
        "city_name": True,
        "rating": True,
        "url": True,
        "latitude": False,
        "longitude": False
    },
    zoom=4.5,
    height=750,
    title="Top-20 hôtels dans les 5 meilleures destinations"
)

fig_hotels.update_layout(
    mapbox_style="carto-positron"
)

fig_hotels.show()
