In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import TimeSeriesSplit
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Chargement des données
data = pd.read_csv('turnips2.csv')

# Calcul des profits pour chaque demi-journée
columns = ['Mon-AM', 'Mon-PM', 'Tues-AM', 'Tues-PM', 'Wed-AM', 'Wed-PM', 'Thurs-AM', 'Thurs-PM', 'Fri-AM', 'Fri-PM', 'Sat-AM', 'Sat-PM']
for col in columns:
    data[col] = data[col] - data['Purchase']

# Normalisation des données
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[columns])

# Clustering des tendances des prix
kmeans = KMeans(n_clusters=4, random_state=0)
data['cluster'] = kmeans.fit_predict(data_scaled)

# Création de modèles de séries temporelles pour chaque cluster
models = {}
for cluster in range(kmeans.n_clusters):
    cluster_data = data[data['cluster'] == cluster]
    cluster_data_scaled = scaler.transform(cluster_data[columns])

# Initialize X_cluster and y_cluster
X_cluster = []
y_cluster = []

# Define the number of timesteps in the RNN input
num_timesteps = 3

# Loop over each cluster to create the sequences
for cluster in range(kmeans.n_clusters):
    # Get the subset of the data for the current cluster
    cluster_data = data[data['cluster'] == cluster]
    cluster_data_scaled = scaler.transform(cluster_data[columns])

    # Create sequences for the RNN
    for i in range(num_timesteps, len(cluster_data_scaled)):
        # Append the sequence of profits for the past num_timesteps half-days
        X_cluster.append(cluster_data_scaled[i-num_timesteps:i, 0]) # Assuming profit is the first column after scaling
        # Append the profit for the current half-day as the target
        y_cluster.append(cluster_data_scaled[i, 0]) # Assuming profit is the first column after scaling

# Convert the lists to numpy arrays
X_cluster = np.array(X_cluster)
y_cluster = np.array(y_cluster)

# Reshape X_cluster to be of shape [samples, timesteps, features]
X_cluster = np.reshape(X_cluster, (X_cluster.shape[0], num_timesteps, 1))



    # Définition du modèle RNN
def create_rnn_model():
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(3, 1)))  # 3 timesteps pour les 3 demi-journées
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

rnn_model = KerasRegressor(build_fn=create_rnn_model, epochs=100, batch_size=32, verbose=0)

    # Validation croisée pour évaluer la performance du modèle
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X_cluster):
    X_train, X_test = X_cluster[train_index], X_cluster[test_index]
    y_train, y_test = y_cluster[train_index], y_cluster[test_index]
        
    # Entraînement du modèle RNN
    rnn_model.fit(X_train, y_train)
        
    # Évaluation du modèle
    y_pred = rnn_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Cluster {cluster}, Split {train_index}, MSE: {mse}')

# Sauvegarde du modèle pour le cluster
models[cluster] = rnn_model

# Après cela, vous pouvez utiliser 'models' pour prédire les profits futurs en fonction du cluster prédit.


  rnn_model = KerasRegressor(build_fn=create_rnn_model, epochs=100, batch_size=32, verbose=0)


Cluster 3, Split [   0    1    2 ... 1664 1665 1666], MSE: 1.7887168121395118
Cluster 3, Split [   0    1    2 ... 3328 3329 3330], MSE: 0.8977027468584372
Cluster 3, Split [   0    1    2 ... 4992 4993 4994], MSE: 1.1247695294789566
Cluster 3, Split [   0    1    2 ... 6656 6657 6658], MSE: 0.7728430406188711
Cluster 3, Split [   0    1    2 ... 8320 8321 8322], MSE: 0.6445626536253554
