[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colab-samples/blob/main/notebooks/basic_notebook_features/text_cells.ipynb)




In [None]:
# Install PyCaret and dependencies
!pip install -U pip setuptools wheel scikit-learn>=1.4 -q
!pip install -U git+https://github.com/pycaret/pycaret.git@master -q
!pip install -U numpy==1.26.4 dask[complete]==2024.4.1 -q
print("‚úÖ All libraries installed!")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Load airline passengers dataset (classic time series)
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
data = pd.read_csv(url)

# Prepare time series
data['Month'] = pd.to_datetime(data['Month'])
data = data.set_index('Month')
data = data['Passengers'].asfreq('MS')  # Month Start frequency

print(f"‚úÖ Airline Passengers Dataset loaded: {data.shape}")
print(f"\nTime Range: {data.index.min()} to {data.index.max()}")
print(f"Total Months: {len(data)}")

# Plot the data
plt.figure(figsize=(12, 5))
data.plot(title='‚úàÔ∏è Monthly Airline Passengers (1949-1960)', color='steelblue', linewidth=2)
plt.xlabel('Date')
plt.ylabel('Number of Passengers (thousands)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

data.tail(10)

In [None]:
# Explore time series characteristics
print("üìä Time Series Statistics:")
print(data.describe())

print(f"\nüìà Trend Analysis:")
print(f"   Starting value (1949): {data.iloc[0]:.0f} passengers")
print(f"   Ending value (1960): {data.iloc[-1]:.0f} passengers")
print(f"   Growth: {((data.iloc[-1] / data.iloc[0]) - 1) * 100:.1f}%")

# Check for seasonality
print(f"\nüîÑ Seasonality Check:")
monthly_avg = data.groupby(data.index.month).mean()
print(monthly_avg)

In [None]:
from pycaret.time_series import *

# Initialize time series forecasting
ts_exp = setup(
    data=data,
    fh=24,  # Forecast 24 months (2 years)
    session_id=42,
    fold=3,
    seasonal_period=12,  # Monthly seasonality
    verbose=False
)

In [None]:
# Compare multiple time series models
print("üîç Training and comparing time series models...")
best_models = compare_models(n_select=3, sort='MAE')
print("\n‚úÖ Top 3 models selected based on Mean Absolute Error")

In [None]:
# Get the best model
best_model = best_models[0]

print(f"üìä Best Model: {best_model.__class__.__name__}")
print("\nGenerating diagnostic plots...")

# Plot model diagnostics
plot_model(best_model, plot='forecast')
plot_model(best_model, plot='insample')
plot_model(best_model, plot='residuals')
plot_model(best_model, plot='diagnostics')

In [None]:
# Forecast future values
print("üîÆ Forecasting next 24 months...")
predictions = predict_model(best_model, fh=24, return_pred_int=True)

print(f"\n‚úÖ Forecast generated for {len(predictions)} months")
print(f"\nForecast Summary:")
print(predictions.head(10))

# Visualize forecast
plt.figure(figsize=(14, 6))
plt.plot(data.index, data.values, label='Historical Data', color='steelblue', linewidth=2)
plt.plot(predictions.index, predictions['y_pred'], label='Forecast', color='coral', linewidth=2, linestyle='--')
plt.fill_between(predictions.index, predictions['lower'], predictions['upper'], alpha=0.3, color='coral', label='Confidence Interval')
plt.xlabel('Date')
plt.ylabel('Passengers (thousands)')
plt.title('‚úàÔ∏è Airline Passengers: Historical vs Forecast')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Fine-tune the best model
print("‚ö° Tuning hyperparameters...")
tuned_model = tune_model(best_model, n_iter=10, optimize='MAE')
print("\n‚úÖ Model tuned!")

In [None]:
# Finalize and generate final forecast
final_model = finalize_model(tuned_model)

# Generate final predictions
final_predictions = predict_model(final_model, fh=24, return_pred_int=True)

print(f"\n‚úÖ Final forecast generated!")
print(f"\nPredicted passengers for next 6 months:")
print(final_predictions.head(6)[['y_pred']])

In [None]:
# Save the forecasting model
model_name = 'airline_passengers_forecast_model'
save_model(final_model, model_name)

print(f"‚úÖ Model successfully saved as '{model_name}.pkl'")
print(f"üì¶ Model can be loaded using: loaded = load_model('{model_name}')")

print(f"\nüéØ Time Series Forecasting Summary:")
print(f"   ‚Ä¢ Dataset: Airline Passengers (144 months)")
print(f"   ‚Ä¢ Best Algorithm: {best_model.__class__.__name__}")
print(f"   ‚Ä¢ Forecast Horizon: 24 months")
print(f"   ‚Ä¢ Task: Univariate forecasting (no exogenous variables)")