In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TFL Journey Data - Time Series Analysis\n",
    "\n",
    "This notebook performs comprehensive time series analysis on Transport for London (TFL) journey data, including:\n",
    "- Daily journey trends and patterns\n",
    "- Weekly and seasonal patterns\n",
    "- Transport mode comparisons\n",
    "- Trend analysis and forecasting insights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import required libraries\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from datetime import datetime, timedelta\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Set style for better visualizations\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")\n",
    "plt.rcParams['figure.figsize'] = (12, 8)\n",
    "plt.rcParams['font.size'] = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and combine the datasets\n",
    "df_2023 = pd.read_csv('../data/Journeys_2023.csv')\n",
    "df_2024_2025 = pd.read_csv('../data/Journeys_2024_2025 .csv')\n",
    "\n",
    "# Combine datasets\n",
    "df = pd.concat([df_2023, df_2024_2025], ignore_index=True)\n",
    "\n",
    "# Convert TravelDate to datetime\n",
    "df['TravelDate'] = pd.to_datetime(df['TravelDate'], format='%Y%m%d')\n",
    "\n",
    "# Extract additional time features\n",
    "df['year'] = df['TravelDate'].dt.year\n",
    "df['month'] = df['TravelDate'].dt.month\n",
    "df['day_of_week_num'] = df['TravelDate'].dt.dayofweek\n",
    "df['quarter'] = df['TravelDate'].dt.quarter\n",
    "df['week_of_year'] = df['TravelDate'].dt.isocalendar().week\n",
    "\n",
    "# Sort by date\n",
    "df = df.sort_values('TravelDate').reset_index(drop=True)\n",
    "\n",
    "print(f\"Dataset shape: {df.shape}\")\n",
    "print(f\"Date range: {df['TravelDate'].min()} to {df['TravelDate'].max()}\")\n",
    "print(f\"Total days: {len(df)}\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Basic statistics\n",
    "print(\"=== BASIC STATISTICS ===\")\n",
    "print(df[['TubeJourneyCount', 'BusJourneyCount']].describe())\n",
    "\n",
    "print(\"\\n=== JOURNEY TOTALS ===\")\n",
    "print(f\"Total Tube journeys: {df['TubeJourneyCount'].sum():,}\")\n",
    "print(f\"Total Bus journeys: {df['BusJourneyCount'].sum():,}\")\n",
    "print(f\"Total journeys: {df['TubeJourneyCount'].sum() + df['BusJourneyCount'].sum():,}\")\n",
    "\n",
    "print(\"\\n=== AVERAGE DAILY JOURNEYS ===\")\n",
    "print(f\"Average daily Tube journeys: {df['TubeJourneyCount'].mean():,.0f}\")\n",
    "print(f\"Average daily Bus journeys: {df['BusJourneyCount'].mean():,.0f}\")\n",
    "print(f\"Average daily total journeys: {df['TubeJourneyCount'].mean() + df['BusJourneyCount'].mean():,.0f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Daily Journey Trends Over Time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create time series plot with both transport modes\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))\n",
    "\n",
    "# Plot 1: Individual transport modes\n",
    "ax1.plot(df['TravelDate'], df['TubeJourneyCount'], label='Tube', alpha=0.7, linewidth=1)\n",
    "ax1.plot(df['TravelDate'], df['BusJourneyCount'], label='Bus', alpha=0.7, linewidth=1)\n",
    "ax1.set_title('Daily Journey Counts by Transport Mode', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Journey Count (millions)', fontsize=12)\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "# Plot 2: Total journeys\n",
    "total_journeys = df['TubeJourneyCount'] + df['BusJourneyCount']\n",
    "ax2.plot(df['TravelDate'], total_journeys, color='green', alpha=0.8, linewidth=1.5)\n",
    "ax2.set_title('Total Daily Journey Counts', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Total Journey Count (millions)', fontsize=12)\n",
    "ax2.set_xlabel('Date', fontsize=12)\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Add 7-day moving average for smoother trend visualization\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))\n",
    "\n",
    "# Calculate moving averages\n",
    "df['Tube_MA7'] = df['TubeJourneyCount'].rolling(window=7).mean()\n",
    "df['Bus_MA7'] = df['BusJourneyCount'].rolling(window=7).mean()\n",
    "df['Total_MA7'] = total_journeys.rolling(window=7).mean()\n",
    "\n",
    "# Plot with moving averages\n",
    "ax1.plot(df['TravelDate'], df['TubeJourneyCount'], label='Tube (Daily)', alpha=0.4, linewidth=0.8)\n",
    "ax1.plot(df['TravelDate'], df['Tube_MA7'], label='Tube (7-day MA)', linewidth=2, color='blue')\n",
    "ax1.plot(df['TravelDate'], df['BusJourneyCount'], label='Bus (Daily)', alpha=0.4, linewidth=0.8)\n",
    "ax1.plot(df['TravelDate'], df['Bus_MA7'], label='Bus (7-day MA)', linewidth=2, color='orange')\n",
    "ax1.set_title('Daily Journey Counts with 7-Day Moving Average', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Journey Count (millions)', fontsize=12)\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "ax2.plot(df['TravelDate'], total_journeys, label='Total (Daily)', alpha=0.4, linewidth=0.8)\n",
    "ax2.plot(df['TravelDate'], df['Total_MA7'], label='Total (7-day MA)', linewidth=2, color='green')\n",
    "ax2.set_title('Total Daily Journey Counts with 7-Day Moving Average', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Total Journey Count (millions)', fontsize=12)\n",
    "ax2.set_xlabel('Date', fontsize=12)\n",
    "ax2.legend()\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Weekly Patterns Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze weekly patterns\n",
    "weekly_stats = df.groupby('DayOfWeek').agg({\n",
    "    'TubeJourneyCount': ['mean', 'std', 'min', 'max'],\n",
    "    'BusJourneyCount': ['mean', 'std', 'min', 'max']\n",
    "}).round(0)\n",
    "\n",
    "print(\"=== WEEKLY PATTERNS ===\")\n",
    "print(weekly_stats)\n",
    "\n",
    "# Create day of week order for proper plotting\n",
    "day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']\n",
    "\n",
    "# Box plots for weekly patterns\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))\n",
    "\n",
    "# Tube journeys by day of week\n",
    "df_reordered = df.copy()\n",
    "df_reordered['DayOfWeek'] = pd.Categorical(df_reordered['DayOfWeek'], categories=day_order, ordered=True)\n",
    "\n",
    "sns.boxplot(data=df_reordered, x='DayOfWeek', y='TubeJourneyCount', ax=ax1)\n",
    "ax1.set_title('Tube Journey Distribution by Day of Week', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Tube Journey Count (millions)', fontsize=12)\n",
    "ax1.tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Bus journeys by day of week\n",
    "sns.boxplot(data=df_reordered, x='DayOfWeek', y='BusJourneyCount', ax=ax2)\n",
    "ax2.set_title('Bus Journey Distribution by Day of Week', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Bus Journey Count (millions)', fontsize=12)\n",
    "ax2.tick_params(axis='x', rotation=45)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Line plot showing average journey counts by day of week\n",
    "weekly_avg = df.groupby('DayOfWeek').agg({\n",
    "    'TubeJourneyCount': 'mean',\n",
    "    'BusJourneyCount': 'mean'\n",
    "}).reindex(day_order)\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(12, 6))\n",
    "x = range(len(day_order))\n",
    "ax.plot(x, weekly_avg['TubeJourneyCount'], marker='o', linewidth=2, label='Tube', color='blue')\n",
    "ax.plot(x, weekly_avg['BusJourneyCount'], marker='s', linewidth=2, label='Bus', color='orange')\n",
    "ax.set_title('Average Journey Counts by Day of Week', fontsize=14, fontweight='bold')\n",
    "ax.set_xlabel('Day of Week', fontsize=12)\n",
    "ax.set_ylabel('Average Journey Count (millions)', fontsize=12)\n",
    "ax.set_xticks(x)\n",
    "ax.set_xticklabels(day_order, rotation=45)\n",
    "ax.legend()\n",
    "ax.grid(True, alpha=0.3)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Monthly and Seasonal Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Monthly analysis\n",
    "monthly_stats = df.groupby(['year', 'month']).agg({\n",
    "    'TubeJourneyCount': 'mean',\n",
    "    'BusJourneyCount': 'mean'\n",
    "}).reset_index()\n",
    "\n",
    "monthly_stats['TotalJourneys'] = monthly_stats['TubeJourneyCount'] + monthly_stats['BusJourneyCount']\n",
    "monthly_stats['Date'] = pd.to_datetime(monthly_stats[['year', 'month']].assign(day=1))\n",
    "\n",
    "# Plot monthly trends\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))\n",
    "\n",
    "# Monthly averages by transport mode\n",
    "for year in monthly_stats['year'].unique():\n",
    "    year_data = monthly_stats[monthly_stats['year'] == year]\n",
    "    ax1.plot(year_data['month'], year_data['TubeJourneyCount'], \n",
    "              marker='o', label=f'Tube {year}', linewidth=2)\n",
    "    ax1.plot(year_data['month'], year_data['BusJourneyCount'], \n",
    "              marker='s', label=f'Bus {year}', linewidth=2, linestyle='--')\n",
    "\n",
    "ax1.set_title('Monthly Average Journey Counts by Year', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Average Journey Count (millions)', fontsize=12)\n",
    "ax1.set_xlabel('Month', fontsize=12)\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "ax1.set_xticks(range(1, 13))\n",
    "\n",
    "# Total monthly journeys\n",
    "for year in monthly_stats['year'].unique():\n",
    "    year_data = monthly_stats[monthly_stats['year'] == year]\n",
    "    ax2.plot(year_data['month'], year_data['TotalJourneys'], \n",
    "              marker='o', label=f'Total {year}', linewidth=2)\n",
    "\n",
    "ax2.set_title('Monthly Average Total Journey Counts by Year', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Average Total Journey Count (millions)', fontsize=12)\n",
    "ax2.set_xlabel('Month', fontsize=12)\n",
    "ax2.legend()\n",
    "ax2.grid(True, alpha=0.3)\n",
    "ax2.set_xticks(range(1, 13))\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Seasonal heatmap\n",
    "seasonal_data = df.groupby(['month', 'DayOfWeek']).agg({\n",
    "    'TubeJourneyCount': 'mean',\n",
    "    'BusJourneyCount': 'mean'\n",
    "}).reset_index()\n",
    "\n",
    "# Create pivot tables for heatmaps\n",
    "tube_heatmap = seasonal_data.pivot(index='month', columns='DayOfWeek', values='TubeJourneyCount')\n",
    "bus_heatmap = seasonal_data.pivot(index='month', columns='DayOfWeek', values='BusJourneyCount')\n",
    "\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))\n",
    "\n",
    "sns.heatmap(tube_heatmap, annot=True, fmt='.0f', cmap='Blues', ax=ax1)\n",
    "ax1.set_title('Average Tube Journey Counts by Month and Day', fontsize=14, fontweight='bold')\n",
    "ax1.set_xlabel('Day of Week', fontsize=12)\n",
    "ax1.set_ylabel('Month', fontsize=12)\n",
    "\n",
    "sns.heatmap(bus_heatmap, annot=True, fmt='.0f', cmap='Oranges', ax=ax2)\n",
    "ax2.set_title('Average Bus Journey Counts by Month and Day', fontsize=14, fontweight='bold')\n",
    "ax2.set_xlabel('Day of Week', fontsize=12)\n",
    "ax2.set_ylabel('Month', fontsize=12)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Transport Mode Comparison and Correlation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation analysis\n",
    "correlation = df['TubeJourneyCount'].corr(df['BusJourneyCount'])\n",
    "print(f\"Correlation between Tube and Bus journeys: {correlation:.3f}\")\n",
    "\n",
    "# Scatter plot with trend line\n",
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "ax.scatter(df['TubeJourneyCount'], df['BusJourneyCount'], alpha=0.6, s=20)\n",
    "\n",
    "# Add trend line\n",
    "z = np.polyfit(df['TubeJourneyCount'], df['BusJourneyCount'], 1)\n",
    "p = np.poly1d(z)\n",
    "ax.plot(df['TubeJourneyCount'], p(df['TubeJourneyCount']), \"r--\", alpha=0.8, linewidth=2)\n",
    "\n",
    "ax.set_xlabel('Tube Journey Count (millions)', fontsize=12)\n",
    "ax.set_ylabel('Bus Journey Count (millions)', fontsize=12)\n",
    "ax.set_title(f'Tube vs Bus Journey Correlation (r = {correlation:.3f})', fontsize=14, fontweight='bold')\n",
    "ax.grid(True, alpha=0.3)\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Ratio analysis\n",
    "df['Tube_Bus_Ratio'] = df['TubeJourneyCount'] / df['BusJourneyCount']\n",
    "df['Total_Journeys'] = df['TubeJourneyCount'] + df['BusJourneyCount']\n",
    "df['Tube_Percentage'] = (df['TubeJourneyCount'] / df['Total_Journeys']) * 100\n",
    "df['Bus_Percentage'] = (df['BusJourneyCount'] / df['Total_Journeys']) * 100\n",
    "\n",
    "# Plot ratio over time\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))\n",
    "\n",
    "ax1.plot(df['TravelDate'], df['Tube_Bus_Ratio'], alpha=0.7, linewidth=1)\n",
    "ax1.axhline(y=df['Tube_Bus_Ratio'].mean(), color='red', linestyle='--', alpha=0.7, label=f'Mean: {df[\"Tube_Bus_Ratio\"].mean():.2f}')\n",
    "ax1.set_title('Tube to Bus Journey Ratio Over Time', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Tube/Bus Ratio', fontsize=12)\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "ax2.plot(df['TravelDate'], df['Tube_Percentage'], label='Tube %', alpha=0.7, linewidth=1)\n",
    "ax2.plot(df['TravelDate'], df['Bus_Percentage'], label='Bus %', alpha=0.7, linewidth=1)\n",
    "ax2.set_title('Percentage of Total Journeys by Transport Mode', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Percentage (%)', fontsize=12)\n",
    "ax2.set_xlabel('Date', fontsize=12)\n",
    "ax2.legend()\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Trend Analysis and Anomaly Detection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate growth rates\n",
    "df['Tube_Growth_Rate'] = df['TubeJourneyCount'].pct_change() * 100\n",
    "df['Bus_Growth_Rate'] = df['BusJourneyCount'].pct_change() * 100\n",
    "df['Total_Growth_Rate'] = df['Total_Journeys'].pct_change() * 100\n",
    "\n",
    "# Plot growth rates\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))\n",
    "\n",
    "ax1.plot(df['TravelDate'], df['Tube_Growth_Rate'], label='Tube', alpha=0.7, linewidth=1)\n",
    "ax1.plot(df['TravelDate'], df['Bus_Growth_Rate'], label='Bus', alpha=0.7, linewidth=1)\n",
    "ax1.axhline(y=0, color='black', linestyle='-', alpha=0.3)\n",
    "ax1.set_title('Daily Growth Rate by Transport Mode', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Growth Rate (%)', fontsize=12)\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "ax2.plot(df['TravelDate'], df['Total_Growth_Rate'], color='green', alpha=0.7, linewidth=1)\n",
    "ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)\n",
    "ax2.set_title('Daily Total Journey Growth Rate', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Growth Rate (%)', fontsize=12)\n",
    "ax2.set_xlabel('Date', fontsize=12)\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Anomaly detection using z-score\n",
    "from scipy import stats\n",
    "\n",
    "def detect_anomalies(data, threshold=3):\n",
    "    z_scores = np.abs(stats.zscore(data))\n",
    "    return z_scores > threshold\n",
    "\n",
    "tube_anomalies = detect_anomalies(df['TubeJourneyCount'].dropna())\n",
    "bus_anomalies = detect_anomalies(df['BusJourneyCount'].dropna())\n",
    "\n",
    "print(f\"Tube anomalies detected: {tube_anomalies.sum()}\")\n",
    "print(f\"Bus anomalies detected: {bus_anomalies.sum()}\")\n",
    "\n",
    "# Plot with anomalies highlighted\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))\n",
    "\n",
    "tube_data = df['TubeJourneyCount'].dropna()\n",
    "tube_dates = df['TravelDate'].iloc[1:len(tube_data)+1]  # Adjust for pct_change()\n",
    "\n",
    "ax1.plot(tube_dates, tube_data, alpha=0.7, linewidth=1, label='Normal')\n",
    "if tube_anomalies.any():\n",
    "    anomaly_dates = tube_dates[tube_anomalies]\n",
    "    anomaly_values = tube_data[tube_anomalies]\n",
    "    ax1.scatter(anomaly_dates, anomaly_values, color='red', s=50, label='Anomalies', zorder=5)\n",
    "ax1.set_title('Tube Journey Counts with Anomaly Detection', fontsize=14, fontweight='bold')\n",
    "ax1.set_ylabel('Tube Journey Count (millions)', fontsize=12)\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "bus_data = df['BusJourneyCount'].dropna()\n",
    "bus_dates = df['TravelDate'].iloc[1:len(bus_data)+1]\n",
    "\n",
    "ax2.plot(bus_dates, bus_data, alpha=0.7, linewidth=1, label='Normal')\n",
    "if bus_anomalies.any():\n",
    "    anomaly_dates = bus_dates[bus_anomalies]\n",
    "    anomaly_values = bus_data[bus_anomalies]\n",
    "    ax2.scatter(anomaly_dates, anomaly_values, color='red', s=50, label='Anomalies', zorder=5)\n",
    "ax2.set_title('Bus Journey Counts with Anomaly Detection', fontsize=14, fontweight='bold')\n",
    "ax2.set_ylabel('Bus Journey Count (millions)', fontsize=12)\n",
    "ax2.set_xlabel('Date', fontsize=12)\n",
    "ax2.legend()\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Summary Statistics and Key Insights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate comprehensive summary\n",
    "print(\"=\" * 60)\n",
    "print(\"TFL JOURNEY DATA ANALYSIS - SUMMARY INSIGHTS\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "# Overall statistics\n",
    "print(f\"\\n📊 OVERALL STATISTICS:\")\n",
    "print(f\"   • Total days analyzed: {len(df):,}\")\n",
    "print(f\"   • Date range: {df['TravelDate'].min().strftime('%Y-%m-%d')} to {df['TravelDate'].max().strftime('%Y-%m-%d')}\")\n",
    "print(f\"   • Total Tube journeys: {df['TubeJourneyCount'].sum():,}\")\n",
    "print(f\"   • Total Bus journeys: {df['BusJourneyCount'].sum():,}\")\n",
    "print(f\"   • Total journeys: {df['TubeJourneyCount'].sum() + df['BusJourneyCount'].sum():,}\")\n",
    "\n",
    "# Daily averages\n",
    "print(f\"\\n📈 DAILY AVERAGES:\")\n",
    "print(f\"   • Average daily Tube journeys: {df['TubeJourneyCount'].mean():,.0f}\")\n",
    "print(f\"   • Average daily Bus journeys: {df['BusJourneyCount'].mean():,.0f}\")\n",
    "print(f\"   • Average daily total journeys: {df['Total_Journeys'].mean():,.0f}\")\n",
    "\n",
    "# Peak days\n",
    "peak_tube_day = df.loc[df['TubeJourneyCount'].idxmax()]\n",
    "peak_bus_day = df.loc[df['BusJourneyCount'].idxmax()]\n",
    "peak_total_day = df.loc[df['Total_Journeys'].idxmax()]\n",
    "\n",
    "print(f\"\\n🏆 PEAK DAYS:\")\n",
    "print(f\"   • Highest Tube day: {peak_tube_day['TravelDate'].strftime('%Y-%m-%d')} ({peak_tube_day['DayOfWeek']}) - {peak_tube_day['TubeJourneyCount']:,.0f} journeys\")\n",
    "print(f\"   • Highest Bus day: {peak_bus_day['TravelDate'].strftime('%Y-%m-%d')} ({peak_bus_day['DayOfWeek']}) - {peak_bus_day['BusJourneyCount']:,.0f} journeys\")\n",
    "print(f\"   • Highest total day: {peak_total_day['TravelDate'].strftime('%Y-%m-%d')} ({peak_total_day['DayOfWeek']}) - {peak_total_day['Total_Journeys']:,.0f} journeys\")\n",
    "\n",
    "# Weekly patterns\n",
    "weekly_avg = df.groupby('DayOfWeek').agg({\n",
    "    'TubeJourneyCount': 'mean',\n",
    "    'BusJourneyCount': 'mean',\n",
    "    'Total_Journeys': 'mean'\n",
    "}).round(0)\n",
    "\n",
    "print(f\"\\n📅 WEEKLY PATTERNS (Average daily journeys):\")\n",
    "for day in day_order:\n",
    "    tube_avg = weekly_avg.loc[day, 'TubeJourneyCount']\n",
    "    bus_avg = weekly_avg.loc[day, 'BusJourneyCount']\n",
    "    total_avg = weekly_avg.loc[day, 'Total_Journeys']\n",
    "    print(f\"   • {day}: Tube {tube_avg:,.0f}, Bus {bus_avg:,.0f}, Total {total_avg:,.0f}\")\n",
    "\n",
    "# Correlation\n",
    "print(f\"\\n🔗 CORRELATION:\")\n",
    "print(f\"   • Tube-Bus correlation: {correlation:.3f}\")\n",
    "print(f\"   • Average Tube/Bus ratio: {df['Tube_Bus_Ratio'].mean():.2f}\")\n",
    "print(f\"   • Average Tube percentage: {df['Tube_Percentage'].mean():.1f}%\")\n",
    "print(f\"   • Average Bus percentage: {df['Bus_Percentage'].mean():.1f}%\")\n",
    "\n",
    "# Growth analysis\n",
    "print(f\"\\n📈 GROWTH ANALYSIS:\")\n",
    "print(f\"   • Average daily Tube growth rate: {df['Tube_Growth_Rate'].mean():.2f}%\")\n",
    "print(f\"   • Average daily Bus growth rate: {df['Bus_Growth_Rate'].mean():.2f}%\")\n",
    "print(f\"   • Average daily total growth rate: {df['Total_Growth_Rate'].mean():.2f}%\")\n",
    "\n",
    "print(\"\\n\" + \"=\" * 60)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}