# PeMS Data Analysis

This notebook combines PeMS traffic data from 2019-2025 and visualizes VMT trends.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from pathlib import Path

## Load and Combine Data Files

Each year has two files:
- Regular file: VMT and Delay (V_t=60 or 40 depending on year)
- _40 file: Delay 40 and Productivity Loss 40

In [None]:
# Define the years we're loading
years = range(2019, 2026)
base_path = Path('.')

all_data = []

for year in years:
    # Load regular file (contains VMT)
    regular_file = base_path / f'pems_output{year}N.xlsx'
    df_regular = pd.read_excel(regular_file)
    
    # Load _40 file (contains Delay 40 and Productivity Loss 40)
    file_40 = base_path / f'pems_output{year}N_40.xlsx'
    df_40 = pd.read_excel(file_40)
    
    # Rename columns for consistency
    df_regular = df_regular.rename(columns={
        'VMT (Veh-Miles)': 'VMT',
        'Delay (V_t=60) (Veh-Hours)': 'Delay_60',
        'Delay (V_t=40) (Veh-Hours)': 'Delay_60',  # Some years use 40 threshold in regular file
        '# Lane Points': 'Lane_Points',
        '% Observed': 'Pct_Observed'
    })
    
    df_40 = df_40.rename(columns={
        'Delay (V_t=40) (Veh-Hours)': 'Delay_40',
        'Lost Prod (40) (lane-mi-hrs)': 'Productivity_Loss_40',
        '# Lane Points': 'Lane_Points',
        '% Observed': 'Pct_Observed'
    })
    
    # Merge on Month (and Lane_Points/Pct_Observed for verification)
    df_merged = df_regular[['Month', 'VMT', 'Delay_60', 'Lane_Points', 'Pct_Observed']].merge(
        df_40[['Month', 'Delay_40', 'Productivity_Loss_40']],
        on='Month',
        how='outer'
    )
    
    df_merged['Year'] = year
    all_data.append(df_merged)
    
# Combine all years
df = pd.concat(all_data, ignore_index=True)

# Ensure Month is datetime
df['Month'] = pd.to_datetime(df['Month'])

# Sort by date
df = df.sort_values('Month').reset_index(drop=True)

print(f"Combined dataset shape: {df.shape}")
df.head()

In [None]:
# View the complete dataset
df.info()
print("\n")
df.describe()

## VMT Histogram by Month (2019-2025)

In [None]:
# Create bar chart showing monthly VMT
fig, ax = plt.subplots(figsize=(16, 8))

# Create month labels
df['Month_Label'] = df['Month'].dt.strftime('%Y-%m')

# Create bar chart
bars = ax.bar(range(len(df)), df['VMT'] / 1e6, width=0.8, edgecolor='black', linewidth=0.5)

# Color bars by year
colors = plt.cm.viridis([i/6 for i in range(7)])
for i, bar in enumerate(bars):
    year_idx = df.iloc[i]['Year'] - 2019
    bar.set_facecolor(colors[year_idx])

# Set labels
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('VMT (Millions of Vehicle-Miles)', fontsize=12)
ax.set_title('Monthly Vehicle Miles Traveled (VMT) - Bay Area 2019-2025', fontsize=14, fontweight='bold')

# Set x-axis ticks (show every 6 months)
tick_positions = range(0, len(df), 6)
tick_labels = [df.iloc[i]['Month_Label'] for i in tick_positions]
ax.set_xticks(tick_positions)
ax.set_xticklabels(tick_labels, rotation=45, ha='right')

# Add legend for years
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=colors[i], label=str(2019+i)) for i in range(7)]
ax.legend(handles=legend_elements, title='Year', loc='upper right')

# Add gridlines
ax.yaxis.grid(True, linestyle='--', alpha=0.7)
ax.set_axisbelow(True)

plt.tight_layout()
plt.savefig('vmt_monthly_histogram.png', dpi=150, bbox_inches='tight')
plt.show()

print("Chart saved as 'vmt_monthly_histogram.png'")

In [None]:
# Save the combined dataset
df.to_csv('pems_combined_data.csv', index=False)
print("Combined dataset saved as 'pems_combined_data.csv'")