## Facebook Prophet Baseline as Multivariate Time series Forecasting

### Libraries

In [None]:
import numpy as np
import pandas as pd
import os, math
import matplotlib.pyplot as plt
from fbprophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

### Basic data pre-processing

In [None]:
DATA_PATH = '../input/tabular-playground-series-jul-2021'
train_df = pd.read_csv(f'{DATA_PATH}/train.csv')
test_df = pd.read_csv(f'{DATA_PATH}/test.csv')
sample_submission = pd.read_csv(f'{DATA_PATH}/sample_submission.csv')

In [None]:
print(train_df.shape)
train_df.head()

In [None]:
print(test_df.shape)
test_df.head()

In [None]:
train1_df = train_df.drop(['target_benzene','target_nitrogen_oxides'], axis=1)
train2_df = train_df.drop(['target_carbon_monoxide','target_nitrogen_oxides'], axis=1) 
train3_df = train_df.drop(['target_benzene','target_carbon_monoxide'], axis=1) 

In [None]:
def train_dataset(train_df, target):
    # Select the last 30 days as the validation data set and the rest as the training data set
    train_df = train_df.iloc[:-24*30]
    val_df = train_df.iloc[-24*30:]
    
    train = train_df.rename({'date_time':'ds', target:'y'}, axis='columns')

    valid_x = val_df.loc[:, 'date_time':'sensor_5']
    val_x = valid_x.rename({'date_time':'ds'}, axis='columns')
    val_y = val_df[target]
    
    return train, val_x, val_y

### Model & Training

In [None]:
train, val_x, val_y = train_dataset(train1_df, target='target_carbon_monoxide')

model = Prophet(daily_seasonality=10)
# model.add_seasonality(name='yearly', period=4, fourier_order=5)
model.add_regressor('deg_C')
model.add_regressor('relative_humidity')
model.add_regressor('absolute_humidity')
model.add_regressor('sensor_1')
model.add_regressor('sensor_2')
model.add_regressor('sensor_3')
model.add_regressor('sensor_4')
model.add_regressor('sensor_5')

# Fit the model with train set
model_CO = model.fit(train)

# Predict on valid set
y_pred = model_CO.predict(val_x)

# Calcuate metrics
score_mae = mean_absolute_error(val_y, y_pred['yhat'])
score_rmse = math.sqrt(mean_squared_error(val_y, y_pred['yhat']))

print('RMSE: {}'.format(score_rmse))

In [None]:
y_pred['yhat'] = abs(y_pred['yhat'])
plt.figure(figsize=(20,5))
plt.plot(val_y.reset_index(drop=True), label='true')
plt.plot(y_pred['yhat'].reset_index(drop=True), label='pred')
plt.legend()
plt.show()

In [None]:
train, val_x, val_y = train_dataset(train2_df, target='target_benzene')

model = Prophet(daily_seasonality=10)
# model.add_seasonality(name='yearly', period=4, fourier_order=5)
model.add_regressor('deg_C')
model.add_regressor('relative_humidity')
model.add_regressor('absolute_humidity')
model.add_regressor('sensor_1')
model.add_regressor('sensor_2')
model.add_regressor('sensor_3')
model.add_regressor('sensor_4')
model.add_regressor('sensor_5')

# Fit the model with train set
model_benzene = model.fit(train)

# Predict on valid set
y_pred = model_benzene.predict(val_x)

# Calcuate metrics
score_mae = mean_absolute_error(val_y, y_pred['yhat'])
score_rmse = math.sqrt(mean_squared_error(val_y, y_pred['yhat']))

print('RMSE: {}'.format(score_rmse))

In [None]:
y_pred['yhat'] = abs(y_pred['yhat'])
plt.figure(figsize=(20,5))
plt.plot(val_y.reset_index(drop=True), label='true')
plt.plot(y_pred['yhat'].reset_index(drop=True), label='pred')
plt.legend()
plt.show()

In [None]:
train, val_x, val_y = train_dataset(train3_df, target='target_nitrogen_oxides')

model = Prophet(daily_seasonality=10)
# model.add_seasonality(name='yearly', period=4, fourier_order=5)
model.add_regressor('deg_C')
model.add_regressor('relative_humidity')
model.add_regressor('absolute_humidity')
model.add_regressor('sensor_1')
model.add_regressor('sensor_2')
model.add_regressor('sensor_3')
model.add_regressor('sensor_4')
model.add_regressor('sensor_5')

# Fit the model with train set
model_NO = model.fit(train)

# Predict on valid set
y_pred = model_NO.predict(val_x)

# Calcuate metrics
score_mae = mean_absolute_error(val_y, y_pred['yhat'])
score_rmse = math.sqrt(mean_squared_error(val_y, y_pred['yhat']))

print('RMSE: {}'.format(score_rmse))

In [None]:
y_pred['yhat'] = abs(y_pred['yhat'])
plt.figure(figsize=(20,5))
plt.plot(val_y.reset_index(drop=True), label='true')
plt.plot(y_pred['yhat'].reset_index(drop=True), label='pred')
plt.legend()
plt.show()

In [None]:
test_x = test_df.rename({'date_time':'ds'}, axis='columns')

CO_pred = model_CO.predict(test_x)
Benzene_pred = model_benzene.predict(test_x)
NO_pred = model_NO.predict(test_x)

In [None]:
CO_pred['yhat'] = abs(CO_pred['yhat'])

plt.figure(figsize=(20,5))
plt.plot(CO_pred['yhat'], label = 'Carbon monoxide')
# plt.plot(Benzene_pred['yhat'], label = 'Benzene')
# plt.plot(NO_pred['yhat'], label = 'Nitrogen oxide')
plt.legend()
plt.show()

### Submission

In [None]:
results = pd.concat([test_df['date_time'], CO_pred['yhat'], Benzene_pred['yhat'], NO_pred['yhat']], axis=1, ignore_index=True)
results.columns = sample_submission.columns

In [None]:
results.to_csv('submission.csv', index=False)

> ### *Open to any suggestions..*