In [None]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
# Author: Leonardo Oste
# WebSite: oste.com.br
# Ln: https://www.linkedin.com/in/leonardooste/
# Date: Feb/05/2024
# Python Version: 3.12.1
# MacBook Pro - M1 Max - 32 GB - macOS Sonoma 14.3
# Feel free to use the code for any purpose and reach me with any questions.   
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

## Forecasting machine CPU Usage using Prophet Algorithm
#### Code Structure
This code is segment in the principals steps to test, train and predict. It isn't sequencial, the code uses the following structure:
1. Reading and formatting CSV data file.
2. Plot the raw Data and plot the days as lines. (not necessary)
3. HyperTunning, testing a dictionary of parameters manually inputed to get the check the accuracy based on RMSE.
4. Fit and Predict using the HyperTunning best parameters,
5. Test it again and plot the RMSE result chart.
6. Plot the entire Data and Forecasting

##### 1. Reading the data
Basically read an pre-formatted data and change the columns to the Prophet format
##### 2. Plot the raw Data
The Time data is using an frequency of 30 seconds per data, and a Day have 2880 ticks (24h * 60minutos * 2 (60s/30s)).
##### 3. HyperTunning
This code is segmented in 3 Steps, the first one is a dict with the parameters manually inputed by you to test it, currently I'm testing only add_seasonality and the Prophet Core Params. The second segment will test all the parameters possibilities, testing it and storing the RMSE's result. At final step, just get the best parameters based on best RMSE.
##### 4. Predicting
Use the HyperTunning best parameters and fit four days in the model, after it forecasting the next day(s).
##### 5. RMSE result chart
Plot the RMSE chart based on best parameters.
##### 6. Ploting all together
Plot the entire data and the Model Forecasting based.

In [None]:
# - - -
# Read the CSV Data file and adjust the columns names/type to Prophet. 
# - - -


import warnings # disable warnings
warnings.filterwarnings("ignore")

import pandas as pd 
from prophet import Prophet
import matplotlib.pyplot as plt 

df = pd.read_csv('../Data/Synthetic_CPU_Usage_Week.csv')
df = df.rename(columns={'cpu':'y'})
df = df.rename(columns={'time':'ds'})
df['ds'] = pd.to_datetime(df['ds'])


In [None]:
# - - -
# Plot the DataFrame and vertical lines to segment the days.
# - - -


import matplotlib.dates as mdates

fig = plt.figure(figsize=(40, 6), dpi=100)
plt.xticks(rotation=45, ha='right')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d %H'))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
plt.axvline(df['ds'].iloc[2880], color='red', linestyle='--', alpha=0.5)
plt.axvline(df['ds'].iloc[2880*2], color='red', linestyle='--', alpha=0.5)
plt.axvline(df['ds'].iloc[2880*3], color='red', linestyle='--', alpha=0.5)
plt.axvline(df['ds'].iloc[2880*4], color='red', linestyle='--', alpha=0.5)

plt.plot(df['ds'],df['y'],color='b')
plt.show()

<img src='../Images/mltimeseries_prophet_image_1.png'>

In [None]:
# - - -
# HyperTunning to test multiples params and check RMSE
# - - -


import itertools
import pandas as pd
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

# Params to test
_params_prohet = {  
    'changepoint_prior_scale': [0.001]
}

_params_seasonality = {  
    'name': ['daily'],
    'prior_scale': [5,10,15],
    'fourier_order': [13,15,20,30],
    'mode': ['additive'],
    'period': [1],
}

# Treat the params into an dict
all_params_prophet = [dict(zip(_params_prohet.keys(), v)) for v in itertools.product(*_params_prohet.values())]
all_params_seasonality = [dict(zip(_params_seasonality.keys(), v)) for v in itertools.product(*_params_seasonality.values())]
rmses = []  
best_rmse = float('inf')
best_params_prophet = None
best_params_seasonality = None

# Test it recursively
for params_prophet in all_params_prophet:
    for params_seasonality in all_params_seasonality:
        m = Prophet(**params_prophet) # Testing the params for Prophet
        m.add_seasonality(**params_seasonality) # Testing the params for Seasonality
        m.fit(df)

        df_cv = cross_validation(m, initial='2 days', period='30 minutes', horizon = '2 hours',parallel="processes")

        df_p = performance_metrics(df_cv, rolling_window=1)
        rmse = df_p['rmse'].values[0] # type: ignore
        rmses.append(rmse) 

        # Find best params
        if rmse < best_rmse:
            best_rmse = rmse
            best_params_prophet = params_prophet
            best_params_seasonality = params_seasonality

# Get the best params
tuning_results = pd.DataFrame(list(itertools.product(all_params_prophet, all_params_seasonality)))
tuning_results.columns = ['params_prophet', 'params_seasonality']
tuning_results['rmse'] = rmses

best_row = tuning_results.loc[tuning_results['rmse'].idxmin()]
print("Best params_prophet:", best_row['params_prophet'])
print("Best params_seasonality:", best_row['params_seasonality'])
print("Best RMSE:", best_row['rmse'])

In [None]:
# - - -
# Fit and Predict using the parameters tested in HyperTunning
# - - -


casting = (2880*1)
m = Prophet(**best_row['params_prophet'])
m.add_seasonality(**best_row['params_seasonality'])
model = m.fit(df[:2880*4])
future = m.make_future_dataframe(periods=casting,freq='30s')
forecast = m.predict(future,)

In [None]:
# - - -
# CrossValidation and Plot RMSE to verify the model accuracy
# - - -


from prophet.plot import plot_cross_validation_metric

df_cv = cross_validation(m, initial='2 days', period='30 minutes', horizon = '2 hours',parallel="processes")
df_p = performance_metrics(df_cv)

fig = plot_cross_validation_metric(df_cv, metric='rmse',rolling_window=0.05)

<img src='../Images/mltimeseries_prophet_image_2.png' width='500'>

In [None]:
# - - -
# Plot entire data and the predictions
# - - -


import matplotlib.pyplot as plt 
import matplotlib.dates

period = 2880 * 4 # 2880 = 30s freq in 24h

# Adjust the Chart size and Xaxis format
plt.figure(figsize=(50, 6), dpi=200)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d %H'))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
plt.xticks(rotation=45, ha='right')

plt.plot(df['ds'],df['y'])
plt.axvline(df['ds'].iloc[period], color='red', linestyle='-', alpha=0.5)

plt.plot(forecast['ds'][period:],forecast['yhat_lower'][period:], color="r")
plt.plot(forecast['ds'][period:],forecast['yhat_upper'][period:], color="r")
plt.plot(forecast['ds'][period:],forecast['yhat'][period:], color="r")

plt.show()

<img src='../Images/mltimeseries_prophet_image_3.png'>