In [None]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the data
file_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\Train_1.csv'
data = pd.read_csv(file_path)

# Rename columns for convenience
data.columns = ['ts', 'temp']

# Convert 'ts' column to datetime and handle timezone offset
data['ts'] = pd.to_datetime(data['ts'].str.replace(' Dubai', ''), errors='coerce')
# Drop rows where datetime parsing failed
data = data.dropna(subset=['ts'])

# Clean temperature column and convert to numeric
data['temp'] = data['temp'].str.replace('°C', '').astype(float)

# Separate data for temperature
df_temp = data[['ts', 'temp']].rename(columns={'ts': 'ds', 'temp': 'y'})

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)

# Split data into training and testing sets
train_size = int(len(df_temp) * 0.8)
train_df = df_temp[:train_size]
test_df = df_temp[train_size:]

# Initialize Prophet model with tuned hyperparameters
model_temp = Prophet(seasonality_mode='additive',     # Adjust based on data exploration
                     interval_width=0.95,              # Adjust prediction interval if needed
                     changepoint_prior_scale=0.01)    # Tune based on data patterns

# Fit the model
model_temp.fit(train_df)

# Create future DataFrame for the test set
future_temp = model_temp.make_future_dataframe(periods=len(test_df), freq='5T')

# Predict the future values
forecast_temp = model_temp.predict(future_temp)

# Calculate RMSE for the test set
y_true = test_df['y'].values
y_pred = forecast_temp['yhat'].values[-len(test_df):]
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f"RMSE for the test set: {rmse}")

# Format the predicted 'ds' column to "2023-03-11T08:20:00+04:00 Dubai"
forecast_temp['ds'] = forecast_temp['ds'].dt.tz_localize('UTC').dt.tz_convert('Asia/Dubai').dt.strftime('%Y-%m-%dT%H:%M:%S%z') + ' Dubai'

# Save only the predicted 200 future values (ds and yhat columns) to a CSV file
output_path = r'C:\Users\syounas\OneDrive - Enova Facilities Management\Tasks\GitHub\HubgradeDataCleaning\Sana\Data\pred.csv'
forecast_temp[['ds', 'yhat']].tail(200).to_csv(output_path, index=False)
