In [34]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import boto3
from dotenv import load_dotenv
import os
from io import BytesIO

# Chargement du dataset depuis S3

In [35]:
load_dotenv()

AWS_ACCESS_KEY_ID = os.environ["API_KEY_S3"]
AWS_SECRET_ACCESS_KEY = os.environ["API_SECRET_KEY_S3"]
AWS_REGION = os.environ.get("AWS_REGION", "eu-west-3")

# Créer une session boto3
session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_REGION,
)

# Créer un client S3
s3 = session.client("s3")

# Lecture de csv depuis S3
bucket_name = "dsfs36-bucket-01"
object_key = "outputs/osm_weather_daily.csv"

obj = s3.get_object(Bucket=bucket_name, Key=object_key)

df_weather_update = pd.read_csv(BytesIO(obj["Body"].read()))
df_weather_update.head()

Unnamed: 0,city,date,temp_day,temp_night,humidity,pop,rain,lat,lon
0,Mont Saint Michel,2026-01-26,7.44,8.33,85,1.0,1.45,48.635954,-1.51146
1,Mont Saint Michel,2026-01-27,8.4,4.47,77,1.0,12.68,48.635954,-1.51146
2,Mont Saint Michel,2026-01-28,8.37,4.23,74,0.0,0.0,48.635954,-1.51146
3,Mont Saint Michel,2026-01-29,7.13,6.25,84,1.0,0.62,48.635954,-1.51146
4,Mont Saint Michel,2026-01-30,9.13,7.76,87,1.0,14.44,48.635954,-1.51146


In [36]:
df_weather_update.head(5)

Unnamed: 0,city,date,temp_day,temp_night,humidity,pop,rain,lat,lon
0,Mont Saint Michel,2026-01-26,7.44,8.33,85,1.0,1.45,48.635954,-1.51146
1,Mont Saint Michel,2026-01-27,8.4,4.47,77,1.0,12.68,48.635954,-1.51146
2,Mont Saint Michel,2026-01-28,8.37,4.23,74,0.0,0.0,48.635954,-1.51146
3,Mont Saint Michel,2026-01-29,7.13,6.25,84,1.0,0.62,48.635954,-1.51146
4,Mont Saint Michel,2026-01-30,9.13,7.76,87,1.0,14.44,48.635954,-1.51146


In [37]:
df_weather_update.shape

(280, 9)

# Transformations

In [38]:
# Conversion date
df_weather_update["date"] = pd.to_datetime(df_weather_update["date"]) 

In [39]:
df_weather_update.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   city        280 non-null    object        
 1   date        280 non-null    datetime64[ns]
 2   temp_day    280 non-null    float64       
 3   temp_night  280 non-null    float64       
 4   humidity    280 non-null    int64         
 5   pop         280 non-null    float64       
 6   rain        280 non-null    float64       
 7   lat         280 non-null    float64       
 8   lon         280 non-null    float64       
dtypes: datetime64[ns](1), float64(6), int64(1), object(1)
memory usage: 19.8+ KB


# EDA

In [40]:
# Agrégation par température moyenne et somme de précipitations
df_city_temp = df_weather_update.groupby(["city", "lat", "lon"], as_index=False).agg({
    "temp_day": "mean",
    "rain": "sum"
}).sort_values(by = "temp_day", ascending=True)
print(df_city_temp)

                            city        lat       lon  temp_day    rain
13  Chateau du Haut Koenigsbourg  48.249382  7.343941   2.62750   17.96
18              Gorges du Verdon  43.749656  6.328562   3.77750   46.78
3                         Annecy  45.899235  6.128885   4.47875   10.21
4                         Ariege  42.945537  1.406554   4.81750   40.80
32                    Strasbourg  48.584614  7.750713   4.81750   21.18
17                     Eguisheim  48.044797  7.307962   5.02000   18.75
22                         Lille  50.636565  3.063528   5.13625   16.57
16                         Dijon  47.321581  5.041470   5.14000   44.40
15                        Colmar  48.077752  7.357964   5.27250   18.07
8                       Besancon  47.238022  6.024362   5.39125   53.20
2                         Amiens  49.894171  2.295695   6.09875   22.74
19                      Grenoble  45.187560  5.735782   6.26375   19.74
23                          Lyon  45.757814  4.832011   7.14875 

In [41]:
fig_bar_temp = px.bar(
    df_city_temp,
    x="temp_day",
    y="city",
    color="temp_day",
    color_continuous_scale="Plasma",
    orientation="h",
    labels={"temp_day": "Température moyenne (°C)"},
    title="Température moyenne par ville"
)

fig_bar_temp.update_layout(
    height=800,
    width=800,
    bargap=0.05,
    yaxis=dict(title="")
)

fig_bar_temp.show()

In [42]:
fig_map_temp = px.scatter_mapbox(
    df_city_temp,
    lat="lat",
    lon="lon",
    size="rain",
    color="temp_day",
    hover_name="city",
    hover_data={"temp_day": ":.1f", "rain": ":.1f"},
    size_max=30,
    zoom=4,
    title="Carte des villes : couleur = température moyenne (°C), taille = pluie totale (mm)"
)

fig_map_temp.update_layout(mapbox_style="carto-positron", margin=dict(l=0, r=0, t=50, b=0))
fig_map_temp.show()

In [43]:
# Agrégation par température moyenne et somme de précipitations
df_city_pop = df_weather_update.groupby(["city", "lat", "lon"], as_index=False).agg({
    "humidity": "mean",
    "pop": "sum"
}).sort_values(by = "pop", ascending=False)
print(df_city_pop)

                            city        lat       lon  humidity   pop
4                         Ariege  42.945537  1.406554    81.000  8.00
20                   La Rochelle  46.159732 -1.151595    79.750  8.00
8                       Besancon  47.238022  6.024362    86.125  7.98
33                      Toulouse  43.604464  1.444243    68.875  7.80
3                         Annecy  45.899235  6.128885    90.625  7.80
7                        Bayonne  43.494514 -1.473666    77.625  7.80
19                      Grenoble  45.187560  5.735782    89.750  7.80
9                       Biarritz  43.483252 -1.559278    75.500  7.79
13  Chateau du Haut Koenigsbourg  48.249382  7.343941    85.125  7.63
26                     Montauban  44.017584  1.354999    71.625  7.26
32                    Strasbourg  48.584614  7.750713    82.125  7.01
25             Mont Saint Michel  48.635954 -1.511460    80.750  6.96
31                       St Malo  48.649518 -2.026041    79.625  6.76
23                  

In [44]:
fig_bar_pop = px.bar(
    df_city_pop,
    x="pop",
    y="city",
    color="pop",
    color_continuous_scale="Plasma",
    orientation="h",
    labels={"pop": "Probalité de précipitations"},
    title="Température moyenne par ville"
)

fig_bar_pop.update_layout(
    height=800,
    width=800,
    bargap=0.05,
    yaxis=dict(title="")
)

fig_bar_pop.show()

In [45]:
fig_map_pop = px.scatter_mapbox(
    df_city_pop,
    lat="lat",
    lon="lon",
    size="humidity",
    color="pop",
    hover_name="city",
    hover_data={"humidity": ":.1f", "pop": ":.1f"},
    color_continuous_scale="RdYlBu_r",
    size_max=20,
    zoom=4,
    title="Carte des villes : couleur = Humidité moyenne, taille = Probalité de pluie"
)

fig_map_pop.update_layout(mapbox_style="carto-positron", margin=dict(l=0, r=0, t=50, b=0))
fig_map_pop.show()

In [46]:
df_rain_daily = df_weather_update.groupby(["date", "city"], as_index = False)["rain"].sum().sort_values(by = "date")
df_rain_daily

Unnamed: 0,date,city,rain
0,2026-01-26,Aigues Mortes,0.70
20,2026-01-26,La Rochelle,4.39
21,2026-01-26,Le Havre,0.10
22,2026-01-26,Lille,0.00
23,2026-01-26,Lyon,0.00
...,...,...,...
257,2026-02-02,Cassis,2.79
258,2026-02-02,Chateau du Haut Koenigsbourg,3.39
259,2026-02-02,Collioure,0.00
261,2026-02-02,Dijon,0.18


In [47]:
fig_rain_bar = px.bar(
    df_rain_daily,
    x="date",
    y="rain",
    color="city",
    barmode="group",
    labels={"rain": "Pluie (mm)", "date": "Date", "city": "Ville"},
    title="Pluie quotidienne par ville"
)
fig_rain_bar.show()

# Choix top 5 des destinations

L’objectif est d’identifier les villes offrant la météo la plus agréable sur les 7 prochains jours.
 
Les indicateurs suivants sont calculés :
- Plus la température est élevée, plus la destination est considérée comme agréable
- Calculée comme la somme de `pop × rain` pour chaque jour, afin de tenir compte à la fois  
  de la quantité de pluie et de sa probabilité
- Une humidité élevée est pénalisante pour le "confort" ressenti

Les villes sont enfin classées par score décroissant, et les 5 meilleures destinations
sont retenues comme celles offrant la météo la plus favorable sur la période étudiée.

In [48]:
W_RAIN, W_HUM = 0.30, 0.03

top5 = (
    df_weather_update.sort_values(["city", "date"])
      .groupby("city")
      .head(7)
      .assign(exp_rain=lambda d: d["pop"] * d["rain"])
      .groupby("city", as_index=False)
      .agg(
          mean_temp_day=("temp_day", "mean"),
          mean_humidity=("humidity", "mean"),
          expected_rain_7d_mm=("exp_rain", "sum"),
      )
      .assign(scoring=lambda d: d["mean_temp_day"]
                            - W_RAIN * d["expected_rain_7d_mm"]
                            - W_HUM  * d["mean_humidity"])
      .sort_values("scoring", ascending=False)
      .head(5)
)

print(top5)

             city  mean_temp_day  mean_humidity  expected_rain_7d_mm   scoring
14      Collioure      11.514286      63.142857              20.1840  3.564800
0   Aigues Mortes      10.245714      70.857143              17.7900  2.783000
6          Bayeux       8.832857      75.857143              13.5300  2.498143
24      Marseille      10.024286      71.714286              18.5409  2.310587
12         Cassis       9.441429      70.857143              19.5600  1.447714


## Visalisation

In [49]:
# Récupération des coordonnées GPS
coords = (
    df_weather_update
    .loc[:, ["city", "lat", "lon"]]
    .dropna(subset=["lat", "lon"])
    .drop_duplicates(subset=["city"])
)

# Fusion des coordonnées avec le top5
top5_map = top5.merge(coords, on="city", how="left")

# Informations au survol
top5_map["hover"] = (
    "City: " + top5_map["city"]
    + "<br>Score: " + top5_map["scoring"].round(2).astype(str)
    + "<br>Temp (day): " + top5_map["mean_temp_day"].round(1).astype(str) + "°C"
    + "<br>Humidity: " + top5_map["mean_humidity"].round(0).astype(int).astype(str) + "%"
    + "<br>Exp rain 7d: " + top5_map["expected_rain_7d_mm"].round(1).astype(str) + " mm"
)

# Map
fig = px.scatter_mapbox(
    top5_map,
    lat="lat",
    lon="lon",
    size="scoring",         
    color="scoring",
    hover_name="city",
    hover_data={"lat": False, "lon": False},
    custom_data=["hover"],
    zoom=4,
    height=520,
    title="Top 5 destinations (météo la plus agréable sur 7 jours)"
)

fig.update_traces(hovertemplate="%{customdata[0]}<extra></extra>")
fig.update_layout(mapbox_style="carto-positron", margin=dict(l=0, r=0, t=50, b=0))
fig.show()