In [None]:
# Projeto 13 – Previsão de Pedidos de Táxi por Hora com Séries Temporais

In [None]:
# 1. Bibliotecas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
# 2. Carregar os dados
df = pd.read_csv('/mnt/data/taxi.csv', parse_dates=['datetime'], index_col='datetime')

In [None]:
# 3. Reamostragem para 1 hora
df = df.resample('1H').sum()

In [None]:
# 4. Análise Exploratória
plt.figure(figsize=(12, 4))
df['num_orders'].plot(title='Pedidos de Táxi por Hora')
plt.ylabel('nº Pedidos')
plt.grid()
plt.show()

In [None]:
# 5. Engenharia de Atributos
def make_features(data, max_lag, rolling_mean_size):
    data['hour'] = data.index.hour
    data['dayofweek'] = data.index.dayofweek
    for lag in range(1, max_lag + 1):
        data[f'lag_{lag}'] = data['num_orders'].shift(lag)
    data['rolling_mean'] = data['num_orders'].shift().rolling(rolling_mean_size).mean()
    return data

In [None]:
make_features(df, max_lag=6, rolling_mean_size=3)
df = df.dropna()

In [None]:
# 6. Separar dados
features = df.drop('num_orders', axis=1)
target = df['num_orders']
X_train_val, X_test, y_train_val, y_test = train_test_split(features, target, test_size=0.1, shuffle=False)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_val, y_train_val, test_size=0.2, shuffle=False)

In [None]:
# 7. Modelos
## Regressão Linear
lr = LinearRegression()
lr.fit(X_train, y_train)
val_preds_lr = lr.predict(X_valid)
rmse_lr = np.sqrt(mean_squared_error(y_valid, val_preds_lr))
print(f"Regressão Linear - RMSE validação: {rmse_lr:.2f}")

In [None]:
## Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
val_preds_rf = rf.predict(X_valid)
rmse_rf = np.sqrt(mean_squared_error(y_valid, val_preds_rf))
print(f"Random Forest - RMSE validação: {rmse_rf:.2f}")

In [None]:
# 8. Avaliação Final com Teste
final_model = rf if rmse_rf < rmse_lr else lr
test_preds = final_model.predict(X_test)
rmse_test = np.sqrt(mean_squared_error(y_test, test_preds))
print(f"\nMelhor modelo: {type(final_model).__name__}")
print(f"RMSE no conjunto de teste: {rmse_test:.2f}")

In [None]:
# 9. Conclusão
if rmse_test <= 48:
    print("\nMeta alcançada: RMSE ≤ 48 ✅")
else:
    print("\nMeta não alcançada: RMSE > 48 ❌")
