In [1]:
import pandas as pd
import numpy as np
from tslearn.clustering import TimeSeriesKMeans
from tslearn.preprocessing import TimeSeriesScalerMeanVariance

# 1. Cargar dataset
df = pd.read_csv("sell-in.txt", sep="\t")
df['periodo'] = pd.to_datetime(df['periodo'], format='%Y%m')
df = df.groupby(['product_id', 'periodo'])['tn'].sum().reset_index()

# 2. Pivotear: cada fila es un producto, columnas = meses
pivot = df.pivot(index='product_id', columns='periodo', values='tn').fillna(0)

# 3. Formato para tslearn: array 3D (n_samples, n_timestamps, n_features)
data_array = pivot.to_numpy()
data_array = data_array[:, :, np.newaxis]  # agrega eje de "features"

# 4. Normalización por producto
scaler = TimeSeriesScalerMeanVariance()
data_scaled = scaler.fit_transform(data_array)

# 5. Clustering DTW
n_clusters = 5
model = TimeSeriesKMeans(n_clusters=n_clusters, metric="dtw", random_state=42)
labels = model.fit_predict(data_scaled)

# 6. Guardar
cluster_df = pd.DataFrame({'product_id': pivot.index, 'cluster': labels})
cluster_df.to_csv("dtw_clusters.csv", index=False)
print("✅ Archivo 'dtw_clusters.csv' generado correctamente.")


Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


✅ Archivo 'dtw_clusters.csv' generado correctamente.
