In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# 1. Simuler les données : 1000 profils (hotel_code + date_de_sejour)
np.random.seed(42)
n_hotels = 10
n_dates = 100
n_days = 30

data = []

for hotel_id in range(n_hotels):
    for date_id in range(n_dates):
        # Simule un profil de montée en charge (croissance + bruit)
        base = np.linspace(start=np.random.uniform(0.1, 0.4), stop=np.random.uniform(0.7, 1.0), num=n_days)
        noise = np.random.normal(scale=0.05, size=n_days)
        profile = np.clip(base + noise, 0, 1)
        data.append({
            "hotel_code": f"H{hotel_id:02d}",
            "date_de_sejour": pd.to_datetime("2023-01-01") + pd.Timedelta(days=date_id),
            "profile": profile
        })

df = pd.DataFrame(data)

In [3]:

X = np.vstack(df['profile'].values)
X

array([[0.24474646, 0.31516361, 0.25395452, ..., 0.91732945, 0.92847888,
        1.        ],
       [0.17773872, 0.100119  , 0.19615407, ..., 0.7901707 , 0.76868661,
        0.78874274],
       [0.31528135, 0.38879593, 0.37643851, ..., 0.83540928, 0.85192441,
        0.81809149],
       ...,
       [0.27808852, 0.25340983, 0.41079188, ..., 0.67917048, 0.73570224,
        0.75936463],
       [0.19375145, 0.15941113, 0.21139023, ..., 0.92189316, 0.94310611,
        0.9997496 ],
       [0.27942232, 0.30805727, 0.2462786 , ..., 0.82889292, 0.99660773,
        0.85720034]])

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# 1️⃣ Simulation des données : 10 hôtels × 100 dates de séjour × 30 jours avant séjour
np.random.seed(42)
n_hotels = 10
n_dates = 100
n_days = 30

data = []

for hotel_id in range(n_hotels):
    for date_id in range(n_dates):
        # Profil de montée en charge avec bruit
        base = np.linspace(start=np.random.uniform(0.1, 0.4), stop=np.random.uniform(0.7, 1.0), num=n_days)
        noise = np.random.normal(scale=0.05, size=n_days)
        profile = np.clip(base + noise, 0, 1)
        data.append({
            "hotel_code": f"H{hotel_id:02d}",
            "date_de_sejour": pd.to_datetime("2023-01-01") + pd.Timedelta(days=date_id),
            "profile": profile
        })

df = pd.DataFrame(data)

In [7]:
# 2️⃣ Transformation du profil (vecteur) en colonnes J-30 → J
profile_matrix = np.vstack(df["profile"].values)
profile_columns = [f"J-{i}" for i in range(n_days - 1, 0, -1)] + ["J0"]
profile_df = pd.DataFrame(profile_matrix, columns=profile_columns)

# Fusion avec le DataFrame principal
df_expanded = pd.concat([df.drop(columns=["profile"]), profile_df], axis=1)

print("✅ Exemple de données :")
df_expanded

✅ Exemple de données :


Unnamed: 0,hotel_code,date_de_sejour,J-29,J-28,J-27,J-26,J-25,J-24,J-23,J-22,...,J-9,J-8,J-7,J-6,J-5,J-4,J-3,J-2,J-1,J0
0,H00,2023-01-01,0.244746,0.315164,0.253955,0.280605,0.397923,0.383984,0.348789,0.426041,...,0.748740,0.700776,0.771445,0.830860,0.794414,0.897399,0.875232,0.917329,0.928479,1.000000
1,H00,2023-01-02,0.177739,0.100119,0.196154,0.112277,0.168401,0.269203,0.320833,0.317028,...,0.679156,0.698719,0.634744,0.685794,0.742368,0.799131,0.750945,0.790171,0.768687,0.788743
2,H00,2023-01-03,0.315281,0.388796,0.376439,0.345838,0.415901,0.494471,0.435515,0.535276,...,0.759401,0.749805,0.726617,0.798506,0.797434,0.860750,0.796952,0.835409,0.851924,0.818091
3,H00,2023-01-04,0.345660,0.354807,0.316899,0.387767,0.412795,0.410948,0.464129,0.513528,...,0.825176,0.826763,0.849850,0.785961,0.922701,0.803600,0.924167,1.000000,0.887561,0.929904
4,H00,2023-01-05,0.034699,0.142041,0.111878,0.215054,0.171783,0.321632,0.231353,0.280793,...,0.652368,0.683546,0.658603,0.730597,0.760019,0.736028,0.891415,0.848198,0.791322,0.910096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,H09,2023-04-06,0.322368,0.391776,0.296869,0.306755,0.381458,0.426335,0.333097,0.408036,...,0.648438,0.703528,0.696152,0.694890,0.815960,0.773112,0.772718,0.765016,0.745558,0.848077
996,H09,2023-04-07,0.289199,0.382513,0.332620,0.302004,0.261208,0.412406,0.324002,0.385413,...,0.592448,0.713969,0.671095,0.711525,0.686599,0.615142,0.719936,0.795324,0.700826,0.814333
997,H09,2023-04-08,0.278089,0.253410,0.410792,0.280662,0.294553,0.351851,0.322031,0.305866,...,0.534423,0.613550,0.616879,0.695947,0.670098,0.770833,0.703617,0.679170,0.735702,0.759365
998,H09,2023-04-09,0.193751,0.159411,0.211390,0.214061,0.238169,0.371432,0.299122,0.344687,...,0.796857,0.741618,0.779073,0.900994,0.790112,0.884239,0.839038,0.921893,0.943106,0.999750


In [11]:
# 3️⃣ Clustering sur les 30 colonnes
X = df_expanded[profile_columns].values


In [13]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
df_expanded["cluster"] = kmeans.fit_predict(X_scaled)

# 4️⃣ Visualisation : profils moyens par cluster
plt.figure(figsize=(10, 6))

df_expanded



Unnamed: 0,hotel_code,date_de_sejour,J-29,J-28,J-27,J-26,J-25,J-24,J-23,J-22,...,J-8,J-7,J-6,J-5,J-4,J-3,J-2,J-1,J0,cluster
0,H00,2023-01-01,0.244746,0.315164,0.253955,0.280605,0.397923,0.383984,0.348789,0.426041,...,0.700776,0.771445,0.830860,0.794414,0.897399,0.875232,0.917329,0.928479,1.000000,2
1,H00,2023-01-02,0.177739,0.100119,0.196154,0.112277,0.168401,0.269203,0.320833,0.317028,...,0.698719,0.634744,0.685794,0.742368,0.799131,0.750945,0.790171,0.768687,0.788743,3
2,H00,2023-01-03,0.315281,0.388796,0.376439,0.345838,0.415901,0.494471,0.435515,0.535276,...,0.749805,0.726617,0.798506,0.797434,0.860750,0.796952,0.835409,0.851924,0.818091,2
3,H00,2023-01-04,0.345660,0.354807,0.316899,0.387767,0.412795,0.410948,0.464129,0.513528,...,0.826763,0.849850,0.785961,0.922701,0.803600,0.924167,1.000000,0.887561,0.929904,4
4,H00,2023-01-05,0.034699,0.142041,0.111878,0.215054,0.171783,0.321632,0.231353,0.280793,...,0.683546,0.658603,0.730597,0.760019,0.736028,0.891415,0.848198,0.791322,0.910096,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,H09,2023-04-06,0.322368,0.391776,0.296869,0.306755,0.381458,0.426335,0.333097,0.408036,...,0.703528,0.696152,0.694890,0.815960,0.773112,0.772718,0.765016,0.745558,0.848077,1
996,H09,2023-04-07,0.289199,0.382513,0.332620,0.302004,0.261208,0.412406,0.324002,0.385413,...,0.713969,0.671095,0.711525,0.686599,0.615142,0.719936,0.795324,0.700826,0.814333,1
997,H09,2023-04-08,0.278089,0.253410,0.410792,0.280662,0.294553,0.351851,0.322031,0.305866,...,0.613550,0.616879,0.695947,0.670098,0.770833,0.703617,0.679170,0.735702,0.759365,1
998,H09,2023-04-09,0.193751,0.159411,0.211390,0.214061,0.238169,0.371432,0.299122,0.344687,...,0.741618,0.779073,0.900994,0.790112,0.884239,0.839038,0.921893,0.943106,0.999750,2


<Figure size 1000x600 with 0 Axes>