In [1]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
import pandas as pd
from src.config import TRANSFORMED_DATA_DIR

df = pd.read_parquet(TRANSFORMED_DATA_DIR / "tabular_data.parquet")

In [3]:
from datetime import datetime

from src.data_utils import split_time_series_data

X_train, y_train, X_test, y_test = split_time_series_data(
    df,
    cutoff_date=datetime(2023, 8, 1, 0, 0, 0),
    target_column="target"
)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)



In [4]:
past_ride_columns = [c for c in X_train.columns if c.startswith("rides_")]
X_train_only_numeric = X_train[past_ride_columns]
X_test_only_numeric = X_test[past_ride_columns]

In [7]:
from prophet import Prophet

# Prepare data for Prophet
train_data = pd.DataFrame({
    'ds': X_train.index,
    'y': y_train
})

# Convert 'ds' column to datetime
train_data['ds'] = pd.to_datetime(train_data['ds'])

model = Prophet()
model.fit(train_data)





In [9]:
# Prepare test data for prediction
test_data = pd.DataFrame({
    'ds': X_test.index
})

# Convert 'ds' column to datetime
test_data['ds'] = pd.to_datetime(test_data['ds'])

forecast = model.predict(test_data)
predictions = forecast['yhat']

In [10]:
from sklearn.metrics import mean_absolute_error
test_mae = mean_absolute_error(y_test, predictions)
print(f"{test_mae:.4f}")



In [12]:
from src.experiment_utils import set_mlflow_tracking, log_model_to_mlflow
from dotenv import load_dotenv
import os
load_dotenv() 

mlflow = set_mlflow_tracking()
log_model_to_mlflow(model, X_test, "Prophet", "mean_absolute_error", score=test_mae)







