In [3]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load the data
file_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\Train.csv'
data = pd.read_csv(file_path)

# Rename columns for convenience
data.columns = ['ts', 'temp', 'new_point']

# Convert 'ts' column to datetime and handle timezone offset
data['ts'] = pd.to_datetime(data['ts'].str.replace(' Dubai', ''), errors='coerce')
# Drop rows where datetime parsing failed
data = data.dropna(subset=['ts'])

# Clean temperature column and convert to numeric
data['temp'] = data['temp'].str.replace('°C', '').astype(float)

# Clean percentage column and convert to numeric
data['new_point'] = data['new_point'].str.replace('%', '').astype(float)

# Separate data for temperature and new_point
df_temp = data[['ts', 'temp']].rename(columns={'ts': 'ds', 'temp': 'y'})
df_new_point = data[['ts', 'new_point']].rename(columns={'ts': 'ds', 'new_point': 'y'})

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)
df_new_point['ds'] = df_new_point['ds'].dt.tz_localize(None)

# Initialize Prophet models with tuned hyperparameters
model_temp = Prophet(seasonality_mode='additive', interval_width=0.95, changepoint_prior_scale=0.01)
model_new_point = Prophet(seasonality_mode='additive', interval_width=0.95, changepoint_prior_scale=0.01)

# Fit the models
model_temp.fit(df_temp)
model_new_point.fit(df_new_point)

# Create future DataFrames for both temp and new_point (next 400 samples, assuming 5-minute intervals)
future_temp = model_temp.make_future_dataframe(periods=400, freq='5T')
future_new_point = model_new_point.make_future_dataframe(periods=400, freq='5T')

# Predict the future values
forecast_temp = model_temp.predict(future_temp)
forecast_new_point = model_new_point.predict(future_new_point)

# Calculate residuals
residuals_temp = df_temp['y'] - forecast_temp['yhat'][:len(df_temp)]
residuals_new_point = df_new_point['y'] - forecast_new_point['yhat'][:len(df_new_point)]

# Prepare data for LSTM
def prepare_lstm_data(residuals, n_steps):
    X, y = [], []
    for i in range(len(residuals) - n_steps):
        X.append(residuals[i:i + n_steps])
        y.append(residuals[i + n_steps])
    return np.array(X), np.array(y)

n_steps = 10
X_temp, y_temp = prepare_lstm_data(residuals_temp.values, n_steps)
X_new_point, y_new_point = prepare_lstm_data(residuals_new_point.values, n_steps)

# Reshape input to be [samples, time steps, features]
X_temp = X_temp.reshape((X_temp.shape[0], X_temp.shape[1], 1))
X_new_point = X_new_point.reshape((X_new_point.shape[0], X_new_point.shape[1], 1))

# Define and train LSTM model for temperature residuals
lstm_model_temp = Sequential()
lstm_model_temp.add(LSTM(units=64, input_shape=(n_steps, 1)))
lstm_model_temp.add(Dense(units=1))
lstm_model_temp.compile(optimizer='adam', loss='mse')
lstm_model_temp.fit(X_temp, y_temp, epochs=10, batch_size=32, verbose=1)

# Define and train LSTM model for new_point residuals
lstm_model_new_point = Sequential()
lstm_model_new_point.add(LSTM(units=64, input_shape=(n_steps, 1)))
lstm_model_new_point.add(Dense(units=1))
lstm_model_new_point.compile(optimizer='adam', loss='mse')
lstm_model_new_point.fit(X_new_point, y_new_point, epochs=10, batch_size=32, verbose=1)

# Generate future LSTM inputs for temperature and new_point
X_temp_future = residuals_temp.values[-n_steps:].reshape((1, n_steps, 1))
X_new_point_future = residuals_new_point.values[-n_steps:].reshape((1, n_steps, 1))

# Predict future residuals using LSTM
lstm_predictions_temp = []
lstm_predictions_new_point = []

for _ in range(400):
    lstm_pred_temp = lstm_model_temp.predict(X_temp_future)
    lstm_predictions_temp.append(lstm_pred_temp[0, 0])
    X_temp_future = np.append(X_temp_future[:, 1:, :], lstm_pred_temp.reshape(1, 1, 1), axis=1)

    lstm_pred_new_point = lstm_model_new_point.predict(X_new_point_future)
    lstm_predictions_new_point.append(lstm_pred_new_point[0, 0])
    X_new_point_future = np.append(X_new_point_future[:, 1:, :], lstm_pred_new_point.reshape(1, 1, 1), axis=1)

# Combine Prophet predictions and LSTM residuals for final predictions
final_predictions_temp = forecast_temp['yhat'].values[-400:] + np.array(lstm_predictions_temp)
final_predictions_new_point = forecast_new_point['yhat'].values[-400:] + np.array(lstm_predictions_new_point)

# Save the predictions to a CSV file
predictions_df = pd.DataFrame({
    'ts': future_temp['ds'].values[-400:], 
    'temp': final_predictions_temp, 
    'new_point': final_predictions_new_point
})

save_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\hybrid.csv'
predictions_df.to_csv(save_path, index=False)


12:10:25 - cmdstanpy - INFO - Chain [1] start processing
12:10:25 - cmdstanpy - INFO - Chain [1] done processing
12:10:25 - cmdstanpy - INFO - Chain [1] start processing
12:10:26 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/10


  super().__init__(**kwargs)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0056
Epoch 2/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0029
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0023
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0016    
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0016
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0015
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0015
Epoch 8/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0014
Epoch 9/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0014
Epoch 10/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0015
Epoch 1/10
[1m3

In [5]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load the data
file_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\Train.csv'
data = pd.read_csv(file_path)

# Rename columns for convenience
data.columns = ['ts', 'temp', 'new_point']

# Convert 'ts' column to datetime and handle timezone offset
data['ts'] = pd.to_datetime(data['ts'].str.replace(' Dubai', ''), errors='coerce')
# Drop rows where datetime parsing failed
data = data.dropna(subset=['ts'])

# Clean temperature column and convert to numeric
data['temp'] = data['temp'].str.replace('°C', '').astype(float)

# Clean percentage column and convert to numeric
data['new_point'] = data['new_point'].str.replace('%', '').astype(float)

# Separate data for temperature and new_point
df_temp = data[['ts', 'temp']].rename(columns={'ts': 'ds', 'temp': 'y'})
df_new_point = data[['ts', 'new_point']].rename(columns={'ts': 'ds', 'new_point': 'y'})

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)
df_new_point['ds'] = df_new_point['ds'].dt.tz_localize(None)

# Detect and handle zeros after a certain point (e.g., after 187 samples)
zero_threshold_index = 187
df_temp.loc[zero_threshold_index:, 'y'] = np.nan
df_new_point.loc[zero_threshold_index:, 'y'] = np.nan

# Initialize Prophet models with multiplicative seasonality
model_temp = Prophet(seasonality_mode='multiplicative', interval_width=0.95, changepoint_prior_scale=0.01)
model_new_point = Prophet(seasonality_mode='multiplicative', interval_width=0.95, changepoint_prior_scale=0.01)

# Fit the models
model_temp.fit(df_temp)
model_new_point.fit(df_new_point)

# Create future DataFrames for both temp and new_point (next 400 samples, assuming 5-minute intervals)
future_temp = model_temp.make_future_dataframe(periods=400, freq='5T')
future_new_point = model_new_point.make_future_dataframe(periods=400, freq='5T')

# Predict the future values
forecast_temp = model_temp.predict(future_temp)
forecast_new_point = model_new_point.predict(future_new_point)

# Combine Prophet predictions for final forecasts
final_predictions_temp = forecast_temp['yhat'].values[-400:]
final_predictions_new_point = forecast_new_point['yhat'].values[-400:]

# Save the predictions to a CSV file
predictions_df = pd.DataFrame({
    'ts': future_temp['ds'].values[-400:], 
    'temp': final_predictions_temp, 
    'new_point': final_predictions_new_point
})

save_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\hybrid_0.csv'
predictions_df.to_csv(save_path, index=False)


12:17:18 - cmdstanpy - INFO - Chain [1] start processing
12:17:18 - cmdstanpy - INFO - Chain [1] done processing
12:17:18 - cmdstanpy - INFO - Chain [1] start processing
12:17:18 - cmdstanpy - INFO - Chain [1] done processing


In [23]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from datetime import timedelta

# Load the data (assuming you've already loaded and preprocessed it as in the previous code snippet)
# Replace this with your actual data loading and preprocessing steps

file_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\Train.csv'
data = pd.read_csv(file_path)
data.columns = ['ts', 'temp', 'new_point']
data['ts'] = pd.to_datetime(data['ts'].str.replace(' Dubai', ''), errors='coerce')
data = data.dropna(subset=['ts'])
data['temp'] = data['temp'].str.replace('°C', '').astype(float)
data['new_point'] = data['new_point'].str.replace('%', '').astype(float)

# Separate data for temperature and new_point
df_temp = data[['ts', 'temp']].rename(columns={'ts': 'ds', 'temp': 'y'})
df_new_point = data[['ts', 'new_point']].rename(columns={'ts': 'ds', 'new_point': 'y'})

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)
df_new_point['ds'] = df_new_point['ds'].dt.tz_localize(None)

# Initialize Prophet models with multiplicative seasonality and tuned hyperparameters
model_temp = Prophet(seasonality_mode='multiplicative',
                     interval_width=0.95,
                     changepoint_prior_scale=0.05)  # Adjust changepoint_prior_scale as needed
model_new_point = Prophet(seasonality_mode='multiplicative',
                          interval_width=0.95,
                          changepoint_prior_scale=0.05)  # Adjust changepoint_prior_scale as needed

# Fit the Prophet models
model_temp.fit(df_temp)
model_new_point.fit(df_new_point)

# Create future DataFrames for both temp and new_point (next 400 samples, assuming 5-minute intervals)
future_temp = model_temp.make_future_dataframe(periods=400, freq='5T')
future_new_point = model_new_point.make_future_dataframe(periods=400, freq='5T')

# Predict the future values with Prophet
forecast_temp = model_temp.predict(future_temp)
forecast_new_point = model_new_point.predict(future_new_point)

# LSTM Model for temperature prediction
n_steps = 10  # Number of time steps to consider for LSTM
train_size = int(len(df_temp) * 0.8)  # 80% for training, 20% for validation
train_data_temp = df_temp.iloc[:train_size]['y'].values
train_data_temp = train_data_temp.reshape((len(train_data_temp), 1))

# Prepare data for LSTM
def prepare_data_for_lstm(data, n_steps):
    X, y = [], []
    for i in range(len(data)):
        end_ix = i + n_steps
        if end_ix > len(data)-1:
            break
        seq_x, seq_y = data[i:end_ix], data[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

X_temp, y_temp = prepare_data_for_lstm(train_data_temp, n_steps)
X_temp = X_temp.reshape((X_temp.shape[0], X_temp.shape[1], 1))

# Define LSTM model architecture
lstm_model_temp = Sequential()
lstm_model_temp.add(LSTM(units=50, activation='relu', input_shape=(n_steps, 1)))
lstm_model_temp.add(Dense(units=1))
lstm_model_temp.compile(optimizer='adam', loss='mse')

# Fit LSTM model
lstm_model_temp.fit(X_temp, y_temp, epochs=10, batch_size=32, verbose=1)

# Forecast with LSTM
lstm_predictions_temp = []
initial_seq_temp = train_data_temp[-n_steps:]
current_seq_temp = initial_seq_temp.reshape((1, n_steps, 1))

for i in range(len(forecast_temp)):
    lstm_pred_temp = lstm_model_temp.predict(current_seq_temp)[0]
    lstm_predictions_temp.append(lstm_pred_temp)
    current_seq_temp = np.append(current_seq_temp[:, 1:, :], [[lstm_pred_temp]], axis=1)

# Combine Prophet and LSTM predictions for temperature
combined_predictions_temp = forecast_temp[['ds', 'yhat']].copy()
combined_predictions_temp['lstm_yhat'] = lstm_predictions_temp
combined_predictions_temp['final_yhat'] = combined_predictions_temp['yhat'] + combined_predictions_temp['lstm_yhat']

# Calculate RMSE for temp
actual_temp = df_temp['y'].values[-len(combined_predictions_temp):]
predicted_temp = combined_predictions_temp['final_yhat'].values
rmse_temp = np.sqrt(mean_squared_error(actual_temp, predicted_temp))
print(f"RMSE for temperature: {rmse_temp}")

# LSTM Model for new_point prediction
train_data_new_point = df_new_point.iloc[:train_size]['y'].values
train_data_new_point = train_data_new_point.reshape((len(train_data_new_point), 1))

X_new_point, y_new_point = prepare_data_for_lstm(train_data_new_point, n_steps)
X_new_point = X_new_point.reshape((X_new_point.shape[0], X_new_point.shape[1], 1))

# Define LSTM model architecture
lstm_model_new_point = Sequential()
lstm_model_new_point.add(LSTM(units=50, activation='relu', input_shape=(n_steps, 1)))
lstm_model_new_point.add(Dense(units=1))
lstm_model_new_point.compile(optimizer='adam', loss='mse')

# Fit LSTM model
lstm_model_new_point.fit(X_new_point, y_new_point, epochs=10, batch_size=32, verbose=1)

# Forecast with LSTM
lstm_predictions_new_point = []
initial_seq_new_point = train_data_new_point[-n_steps:]
current_seq_new_point = initial_seq_new_point.reshape((1, n_steps, 1))

for i in range(len(forecast_new_point)):
    lstm_pred_new_point = lstm_model_new_point.predict(current_seq_new_point)[0]
    lstm_predictions_new_point.append(lstm_pred_new_point)
    current_seq_new_point = np.append(current_seq_new_point[:, 1:, :], [[lstm_pred_new_point]], axis=1)

# Combine Prophet and LSTM predictions for new_point
combined_predictions_new_point = forecast_new_point[['ds', 'yhat']].copy()
combined_predictions_new_point['lstm_yhat'] = lstm_predictions_new_point
combined_predictions_new_point['final_yhat'] = combined_predictions_new_point['yhat'] + combined_predictions_new_point['lstm_yhat']

# Calculate RMSE for new_point
actual_new_point = df_new_point['y'].values[-len(combined_predictions_new_point):]
predicted_new_point = combined_predictions_new_point['final_yhat'].values
rmse_new_point = np.sqrt(mean_squared_error(actual_new_point, predicted_new_point))
print(f"RMSE for new_point: {rmse_new_point}")

# Save combined predictions to CSV in specified directory
save_path_temp = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\hybrid_predictions_temp.csv'
save_path_new_point = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\hybrid_predictions_new_point.csv'

combined_predictions_temp.to_csv(save_path_temp, index=False)
combined_predictions_new_point.to_csv(save_path_new_point, index=False)


12:29:03 - cmdstanpy - INFO - Chain [1] start processing
12:29:03 - cmdstanpy - INFO - Chain [1] done processing
12:29:03 - cmdstanpy - INFO - Chain [1] start processing
12:29:03 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/10


  super().__init__(**kwargs)


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 513.1949
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.0330
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1180
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0117
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0030
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0028
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0029
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0024
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0031
Epoch 10/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0025
[1m1/1[0m [32m━

ValueError: Found input variables with inconsistent numbers of samples: [1000, 1400]