In [1]:
import pandas as pd
import numpy as np
from prophet import Prophet
from data_extraction.dummy_data_extractor import extract_dummy_data

def facebook_pred(df, length_of_missing_data, data_logging_interval, dqStart):
    

    # Keep only the first two columns
    df = df.iloc[:, :2]

    df.set_index(df.columns[0], inplace=True, drop=True)
    
    df = df[df.index < dqStart]

    df.reset_index(inplace=True)

    # Rename columns
    df.columns = ['ds','y']


    df['ds'] = df['ds'].dt.strftime('%Y-%m-%d %H:%M:%S')

    # Initialize Prophet model with tuned hyperparameters
    model_temp = Prophet(seasonality_mode='additive',  # Adjust based on data exploration
                         interval_width=0.95,          # Adjust prediction interval if needed
                         changepoint_prior_scale=0.001) # Tune based on data patterns

    # Fit the model
    model_temp.fit(df)

    # Number of predictions
    samples = int(length_of_missing_data / data_logging_interval) + 1

    # Create future DataFrame
    future_temp = model_temp.make_future_dataframe(periods=samples, freq='5T')

    # Predict the future values
    forecast_temp = model_temp.predict(future_temp)

    # Convert dq_start to timezone-naive
    dq_start = dqStart.strftime('%Y-%m-%d %H:%M:%S')

    
    # Filter predictions to start from dq_start
    predictions = forecast_temp[forecast_temp['ds'] >= dq_start][['ds', 'yhat']]

    # Rename columns
    predictions.columns = ['timestamp','facebook']

    # Set 'ds' as the index
    predictions.set_index('timestamp', inplace=True)

    
    return predictions

In [2]:
# Example usage:
master_table = extract_dummy_data("dummy_data")
df = master_table.at[0, "his"].iloc[:, :2]

# Extract values from the second row of master_table
length_of_missing_data = master_table.at[0, "dqDuration"]
data_logging_interval = master_table.at[0, "pointInterval"]
dqStart = master_table.at[0, "dqStart"]

# Call the function
predictions = facebook_pred(df, length_of_missing_data, data_logging_interval, dqStart)

# Display the resulting dataframe
print(predictions)

  pythonDF.loc[i, 'dqStart'] = pd.to_datetime(df['ts'].iloc[i], format="%Y-%m-%dT%H:%M:%S%z Dubai")
10:48:04 - cmdstanpy - INFO - Chain [1] start processing
10:48:04 - cmdstanpy - INFO - Chain [1] done processing


                      facebook
timestamp                     
2023-05-09 23:55:00  16.097934
2023-05-10 00:00:00  16.105465
2023-05-10 00:05:00  16.112155
2023-05-10 00:10:00  16.117987
2023-05-10 00:15:00  16.122947
...                        ...
2023-05-11 23:40:00  16.361165
2023-05-11 23:45:00  16.370474
2023-05-11 23:50:00  16.378992
2023-05-11 23:55:00  16.386694
2023-05-12 00:00:00  16.393557

[578 rows x 1 columns]
