In [8]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import numpy as np
from data_extraction.dummy_data_extractor import extract_dummy_data

master_table = extract_dummy_data("dummy_data")

# Extract the relevant dataframe from 'master_table'
df = master_table.at[1, "his"]

df.reset_index(inplace=True)
df = df.dropna()

# Keep only the first two columns
df = df.iloc[:, :2]

# renaming columns
df.columns = ['ds', 'temp']

# Remove ' Dubai' from the datetime strings
df['ds'] = df['ds'].str.replace(' Dubai', '', regex=False)

# Convert the 'ds' column to datetime format
df['ds'] = pd.to_datetime(df['ds'], format="%Y-%m-%dT%H:%M:%S%z")

# Drop rows where datetime parsing failed
df = df.dropna(subset=['ds'])

# Clean temperature column and convert to numeric
df['temp'] = df['temp'].str.replace('°C', '').astype(float)

# Rename columns for convenience
df.columns = ['ds', 'y']

# Separate data for temperature
df_temp = df.copy()

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)

# Initialize Prophet models with tuned hyperparameters
model_temp = Prophet(seasonality_mode='additive',     # Adjust based on data exploration
                     interval_width=0.95,              # Adjust prediction interval if needed
                     changepoint_prior_scale=0.01)    # Tune based on data patterns

# Fit the models
model_temp.fit(df_temp)

length_of_missing_data = pd.Timedelta('0 days 23:30:00')


data_logging_interval = pd.Timedelta('0 days 00:05:00')

 # number of predictions
samples = int(length_of_missing_data/data_logging_interval) + 1

# Create future DataFrames for both temp and new_point (next 200 samples, assuming 5-minute intervals)
future_temp = model_temp.make_future_dataframe(periods=samples, freq='5T')

# Predict the future values
forecast_temp = model_temp.predict(future_temp)

# Extract 'ds' and 'yhat' from forecast_temp
prediction = forecast_temp[['ds', 'yhat']]

# Display the resulting dataframe
print(prediction.head())

  pythonDF.loc[i, 'dqStart'] = pd.to_datetime(df['ts'].iloc[i], format="%Y-%m-%dT%H:%M:%S%z Dubai")
12:57:38 - cmdstanpy - INFO - Chain [1] start processing
12:57:38 - cmdstanpy - INFO - Chain [1] done processing


                   ds       yhat
0 2023-03-14 21:10:00  24.632758
1 2023-03-14 21:15:00  24.610346
2 2023-03-14 21:20:00  24.587935
3 2023-03-14 21:25:00  24.565586
4 2023-03-14 21:30:00  24.543357


In [42]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import numpy as np
from data_extraction.dummy_data_extractor import extract_dummy_data

master_table = extract_dummy_data("dummy_data")

# Extract the relevant dataframe from 'master_table'
df = master_table.at[1, "his"]

df.reset_index(inplace=True)
df = df.dropna()

# Keep only the first two columns
df = df.iloc[:, :2]

# renaming columns
df.columns = ['ds', 'temp']

# Remove ' Dubai' from the datetime strings
df['ds'] = df['ds'].str.replace(' Dubai', '', regex=False)

# Convert the 'ds' column to datetime format
df['ds'] = pd.to_datetime(df['ds'], format="%Y-%m-%dT%H:%M:%S%z")

# Drop rows where datetime parsing failed
df = df.dropna(subset=['ds'])

# Clean temperature column and convert to numeric
df['temp'] = df['temp'].str.replace('°C', '').astype(float)

# Rename columns for convenience
df.columns = ['ds', 'y']

# Separate data for temperature
df_temp = df.copy()

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)

length_of_missing_data = pd.Timedelta('0 days 23:30:00')


data_logging_interval = pd.Timedelta('0 days 00:05:00')

 # number of predictions
samples = int(length_of_missing_data/data_logging_interval) + 1

# Specify the start date for prediction
dq_start = pd.Timestamp('2023-03-19 01:10:00', tz='Asia/Dubai')



# Initialize Prophet models with tuned hyperparameters
model_temp = Prophet(seasonality_mode='additive',     # Adjust based on data exploration
                     interval_width=0.95,              # Adjust prediction interval if needed
                     changepoint_prior_scale=0.01)    # Tune based on data patterns

# Fit the models
model_temp.fit(df_temp)

# Create future DataFrame starting from dq_start
future_temp = model_temp.make_future_dataframe(periods=samples, freq='5T')

# Adjust 'ds' column to start from dq_start
future_temp['ds'] = dq_start + pd.to_timedelta(range(len(future_temp)), unit='m')

# Ensure 'ds' column is timezone-naive
future_temp['ds'] = future_temp['ds'].dt.tz_localize(None)

# Predict the future values
forecast_temp = model_temp.predict(future_temp)

# Ensure 'ds' column in forecast_temp is timezone-naive
forecast_temp['ds'] = forecast_temp['ds'].dt.tz_localize(None)

# Convert dq_start to timezone-naive
dq_start = dq_start.tz_localize(None)

# Filter predictions to start from dq_start
prediction = forecast_temp[forecast_temp['ds'] >= dq_start][['ds', 'yhat']]

# Display the resulting dataframe
print(prediction.head())

  pythonDF.loc[i, 'dqStart'] = pd.to_datetime(df['ts'].iloc[i], format="%Y-%m-%dT%H:%M:%S%z Dubai")
15:40:50 - cmdstanpy - INFO - Chain [1] start processing
15:40:50 - cmdstanpy - INFO - Chain [1] done processing


                   ds       yhat
0 2023-03-19 01:10:00  22.503415
1 2023-03-19 01:11:00  22.501365
2 2023-03-19 01:12:00  22.499302
3 2023-03-19 01:13:00  22.497226
4 2023-03-19 01:14:00  22.495137


In [70]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import numpy as np
from data_extraction.dummy_data_extractor import extract_dummy_data

def facebook_prophet(df, length_of_missing_data, data_logging_interval, dqStart):

    master_table = extract_dummy_data("dummy_data")

row = master_table.iloc[1]
df = row["his"]
df.set_index(df.columns[0], inplace=True, drop=True)
length_of_missing_data = row["dqDuration"]
data_logging_interval = row["pointInterval"]
dqStart	= row['dqStart']
dqDuration = row['dqDuration']



# Extract the relevant dataframe from 'master_table'
df = master_table.at[1, "his"]

# df.reset_index(inplace=False)
df = df.dropna()

# Keep only the first two columns
df = df.iloc[:, :2]

# renaming columns
df.columns = ['ds', 'temp']

# Remove ' Dubai' from the datetime strings
df['ds'] = df['ds'].str.replace(' Dubai', '', regex=False)

# Convert the 'ds' column to datetime format
df['ds'] = pd.to_datetime(df['ds'], format="%Y-%m-%dT%H:%M:%S%z")

# Drop rows where datetime parsing failed
df = df.dropna(subset=['ds'])

# Clean temperature column and convert to numeric
df['temp'] = df['temp'].str.replace('°C', '').astype(float)

# Rename columns for convenience
df.columns = ['ds', 'y']

# Separate data for temperature
df_temp = df.copy()

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)

# length_of_missing_data = pd.Timedelta('0 days 23:30:00')


# data_logging_interval = pd.Timedelta('0 days 00:05:00')

 # number of predictions
samples = int(length_of_missing_data/data_logging_interval) + 1

# # Specify the start date for prediction
# dq_start = pd.Timestamp('2023-03-19 01:10:00', tz='Asia/Dubai')



# Initialize Prophet models with tuned hyperparameters
model_temp = Prophet(seasonality_mode='additive',     # Adjust based on data exploration
                     interval_width=0.95,              # Adjust prediction interval if needed
                     changepoint_prior_scale=0.01)    # Tune based on data patterns

# Fit the models
model_temp.fit(df_temp)

# Create future DataFrame starting from dq_start
future_temp = model_temp.make_future_dataframe(periods=samples, freq='5T')

# Adjust 'ds' column to start from dq_start
future_temp['ds'] = dq_start + pd.to_timedelta(range(len(future_temp)), unit='m')

# Ensure 'ds' column is timezone-naive
future_temp['ds'] = future_temp['ds'].dt.tz_localize(None)

# Predict the future values
forecast_temp = model_temp.predict(future_temp)

# Ensure 'ds' column in forecast_temp is timezone-naive
forecast_temp['ds'] = forecast_temp['ds'].dt.tz_localize(None)

# Convert dq_start to timezone-naive
dq_start = dq_start.tz_localize(None)

# Filter predictions to start from dq_start
prediction = forecast_temp[forecast_temp['ds'] >= dq_start][['ds', 'yhat']]

# Display the resulting dataframe
print(prediction.head())

return prediction

AttributeError: Can only use .str accessor with string values!

In [108]:
import pandas as pd
from prophet import Prophet
from data_extraction.dummy_data_extractor import extract_dummy_data

def facebook_pred(df, length_of_missing_data, data_logging_interval, dqStart):
    
    df.reset_index(inplace=True)
df = df.dropna()

# Keep only the first two columns
df = df.iloc[:, :2]

# renaming columns
df.columns = ['ds', 'temp']

# Remove ' Dubai' from the datetime strings
df['ds'] = df['ds'].str.replace(' Dubai', '', regex=False)

# Convert the 'ds' column to datetime format
df['ds'] = pd.to_datetime(df['ds'], format="%Y-%m-%dT%H:%M:%S%z")

# Drop rows where datetime parsing failed
df = df.dropna(subset=['ds'])

# Clean temperature column and convert to numeric
df['temp'] = df['temp'].str.replace('°C', '').astype(float)

# Rename columns for convenience
df.columns = ['ds', 'y']

# Separate data for temperature
df_temp = df.copy()

# Ensure 'ds' column is timezone-naive
df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)

# Initialize Prophet models with tuned hyperparameters
model_temp = Prophet(seasonality_mode='additive',     # Adjust based on data exploration
                     interval_width=0.95,              # Adjust prediction interval if needed
                     changepoint_prior_scale=0.01)    # Tune based on data patterns

# Fit the models
model_temp.fit(df_temp)


 # number of predictions
samples = int(length_of_missing_data/data_logging_interval) + 1

# Create future DataFrames for both temp and new_point (next 200 samples, assuming 5-minute intervals)
future_temp = model_temp.make_future_dataframe(periods=samples, freq='5T')

# Predict the future values
forecast_temp = model_temp.predict(future_temp)

# Extract 'ds' and 'yhat' from forecast_temp
prediction = forecast_temp[['ds', 'yhat']]

return predictions

# Example usage:
# Replace these values with your actual data and variables
master_table = extract_dummy_data("dummy_data")
df = master_table.at[1, "his"].iloc[:, :2].copy()

length_of_missing_data = pd.Timedelta('0 days 23:30:00')
data_logging_interval = pd.Timedelta('0 days 00:05:00')
dqStart = '2023-03-19 01:10:00'

# Call the function
predictions = facebook_pred(df, length_of_missing_data, data_logging_interval, dqStart)

# Display the resulting dataframe
print(predictions.head())


ValueError: time data "24.269638061523438°C" doesn't match format "%Y-%m-%dT%H:%M:%S%z", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [112]:
import pandas as pd
from prophet import Prophet
from data_extraction.dummy_data_extractor import extract_dummy_data

def facebook_pred(df, length_of_missing_data, data_logging_interval, dqStart):
    df.reset_index(inplace=True)
    df = df.dropna()

    # Keep only the first two columns
    df = df.iloc[:, :2]

    # renaming columns
    df.columns = ['ds', 'temp']

    # Remove ' Dubai' from the datetime strings
    df['ds'] = df['ds'].str.replace(' Dubai', '', regex=False)

    # Try converting the 'ds' column to datetime format with error handling
    try:
        df['ds'] = pd.to_datetime(df['ds'], format="%Y-%m-%dT%H:%M:%S%z")
    except ValueError as e:
        print(f"Error parsing datetime: {e}")
        print("Some datetime strings could not be parsed. Check your data.")
        problematic_rows = df[pd.to_datetime(df['ds'], format="%Y-%m-%dT%H:%M:%S%z", errors='coerce').isna()]
        print("Problematic rows:")
        print(problematic_rows)
        return None

    # Drop rows where datetime parsing failed
    df = df.dropna(subset=['ds'])

    # Clean temperature column and convert to numeric
    df['temp'] = df['temp'].str.replace('°C', '').astype(float)

    # Rename columns for convenience
    df.columns = ['ds', 'y']

    # Separate data for temperature
    df_temp = df.copy()

    # Ensure 'ds' column is timezone-naive
    df_temp['ds'] = df_temp['ds'].dt.tz_localize(None)

    # Initialize Prophet models with tuned hyperparameters
    model_temp = Prophet(seasonality_mode='additive',     # Adjust based on data exploration
                         interval_width=0.95,              # Adjust prediction interval if needed
                         changepoint_prior_scale=0.01)    # Tune based on data patterns

    # Fit the models
    model_temp.fit(df_temp)

    # Calculate number of predictions
    samples = int(length_of_missing_data / data_logging_interval) + 1

    # Specify the start date for prediction
    dq_start = pd.Timestamp(dqStart, tz='Asia/Dubai').tz_localize(None)

    # Create future DataFrame starting from dq_start
    future_temp = model_temp.make_future_dataframe(periods=samples, freq='5T')

    # Adjust 'ds' column to start from dq_start
    future_temp['ds'] = dq_start + pd.to_timedelta(range(len(future_temp)), unit='m')

    # Ensure 'ds' column is timezone-naive
    future_temp['ds'] = future_temp['ds'].dt.tz_localize(None)

    # Predict the future values
    forecast_temp = model_temp.predict(future_temp)

    # Ensure 'ds' column in forecast_temp is timezone-naive
    forecast_temp['ds'] = forecast_temp['ds'].dt.tz_localize(None)

    # Convert dq_start to timezone-naive
    dq_start = dq_start.tz_localize(None)

    # Filter predictions to start from dq_start
    predictions = forecast_temp[forecast_temp['ds'] >= dq_start][['ds', 'yhat']]

    return predictions

# Example usage:
# Replace these values with your actual data and variables
master_table = extract_dummy_data("dummy_data")
df = master_table.at[1, "his"].iloc[:, :2].copy()

length_of_missing_data = pd.Timedelta('0 days 23:30:00')
data_logging_interval = pd.Timedelta('0 days 00:05:00')
dqStart = '2023-03-19 01:10:00'

# Call the function
predictions = facebook_pred(df, length_of_missing_data, data_logging_interval, dqStart)

# Display the resulting dataframe
if predictions is not None:
    print(predictions.head())


  pythonDF.loc[i, 'dqStart'] = pd.to_datetime(df['ts'].iloc[i], format="%Y-%m-%dT%H:%M:%S%z Dubai")
16:24:56 - cmdstanpy - INFO - Chain [1] start processing
16:24:56 - cmdstanpy - INFO - Chain [1] done processing


                   ds       yhat
0 2023-03-19 01:10:00  22.503415
1 2023-03-19 01:11:00  22.501365
2 2023-03-19 01:12:00  22.499302
3 2023-03-19 01:13:00  22.497226
4 2023-03-19 01:14:00  22.495137
