In [3]:
import pandas as pd
import numpy as np
import xgboost as xgb
import plotly.graph_objs as go
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Load dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv"
df = pd.read_csv(url, parse_dates=['Date'])
df.columns = ['ds', 'y']

# Create lag features
for lag in range(1, 6):
    df[f'lag_{lag}'] = df['y'].shift(lag)
df.dropna(inplace=True)

# Split into train and test
train, test = train_test_split(df, test_size=0.1, shuffle=False)
features = [f'lag_{i}' for i in range(1, 6)]

# Train model
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
model.fit(train[features], train['y'])

# Predict
test['y_pred'] = model.predict(test[features])
rmse = np.sqrt(mean_squared_error(test['y'], test['y_pred']))

# Create Plotly figure
fig = go.Figure()

# Actual full time series
fig.add_trace(go.Scatter(x=df['ds'], y=df['y'], mode='lines', name='Actual Temp'))

# Forecast (on test set)
fig.add_trace(go.Scatter(x=test['ds'], y=test['y_pred'], mode='lines+markers', name='Forecasted Temp'))

# Add vertical line where forecast starts
forecast_start = test['ds'].iloc[0]
fig.add_shape(
    type='line',
    x0=forecast_start, x1=forecast_start,
    y0=min(df['y']), y1=max(df['y']),
    line=dict(color='Red', dash='dash'),
    name='Forecast Start'
)
fig.add_annotation(
    x=forecast_start,
    y=max(df['y']),
    text="Forecast Start",
    showarrow=True,
    arrowhead=1
)

# Layout
fig.update_layout(
    title=f"Daily Minimum Temperature Forecast with XGBoost (RMSE: {rmse:.2f})",
    xaxis_title='Date',
    yaxis_title='Temp (°C)',
    shapes=[dict(
        type="line",
        x0=forecast_start,
        x1=forecast_start,
        y0=min(df['y']),
        y1=max(df['y']),
        line=dict(color="Red", width=2, dash="dash"),
    )]
)

fig.show()
