<a id="setup"></a>
## 1. Setup & Data Validation

In [1]:
# Standard imports
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import sys
import json

# Add parent to path
sys.path.insert(0, str(Path.cwd().parent))

from src.config import RAW_DIR, PROCESSED_DIR

print(f"Raw data directory: {RAW_DIR}")
print(f"Processed data directory: {PROCESSED_DIR}")
print("✅ Setup complete")

Raw data directory: /Users/hc/Documents/projects/modivcare-rides-efficiency/data/raw
Processed data directory: /Users/hc/Documents/projects/modivcare-rides-efficiency/data/processed
✅ Setup complete


In [2]:
# Validate all required data files exist
required_files = [
    'trips_with_efficiency.csv',
    'drivers.csv',
    'simulation_results.csv',
    'evaluation_summary.csv'
]

print("Checking required data files...")
print("=" * 50)

all_present = True
for filename in required_files:
    filepath = PROCESSED_DIR / filename
    exists = filepath.exists()
    status = "✓" if exists else "✗"
    print(f"{status} {filename}")
    all_present = all_present and exists

print("\n" + ("✅ All files present!" if all_present else "⚠️ Missing files detected!"))

Checking required data files...
✓ trips_with_efficiency.csv
✓ drivers.csv
✓ simulation_results.csv
✓ evaluation_summary.csv

✅ All files present!


In [3]:
# Load and validate data
trips_df = pd.read_csv(
    PROCESSED_DIR / 'trips_with_efficiency.csv',
    parse_dates=['requested_pickup_time', 'scheduled_pickup_time', 
                 'actual_pickup_time', 'actual_dropoff_time']
)
drivers_df = pd.read_csv(PROCESSED_DIR / 'drivers.csv')

# Data quality checks
print("DATA QUALITY VALIDATION")
print("=" * 50)

checks = {
    'trips_count': len(trips_df) > 0,
    'drivers_count': len(drivers_df) > 0,
    'efficiency_index_present': 'efficiency_index' in trips_df.columns,
    'no_null_efficiency (non-cancelled)': trips_df[~trips_df['is_cancelled']]['efficiency_index'].notna().all(),
    'valid_efficiency_range': trips_df['efficiency_index'].between(0, 100).all(),
    'datetime_parsed': pd.api.types.is_datetime64_any_dtype(trips_df['scheduled_pickup_time'])
}

for check_name, passed in checks.items():
    status = "✓" if passed else "✗"
    print(f"{status} {check_name}")

DATA QUALITY VALIDATION
✓ trips_count
✓ drivers_count
✓ efficiency_index_present
✓ no_null_efficiency (non-cancelled)
✓ valid_efficiency_range
✓ datetime_parsed


<a id="data-prep"></a>
## 2. Dashboard Data Preparation

In [5]:
# Create aggregated data for faster dashboard loading

# Daily metrics
trips_df['date'] = trips_df['scheduled_pickup_time'].dt.date
daily_metrics = trips_df.groupby('date').agg({
    'trip_id': 'count',
    'efficiency_index': 'mean',
    'is_cancelled': 'sum',
    'distance_miles': 'sum'
}).reset_index()
daily_metrics.columns = ['date', 'trip_count', 'avg_efficiency', 'cancellations', 'total_miles']

print(f"✅ Daily metrics: {len(daily_metrics)} days")
daily_metrics.head()

✅ Daily metrics: 89 days


Unnamed: 0,date,trip_count,avg_efficiency,cancellations,total_miles
0,2025-01-01,63,42.96706,4,306.77
1,2025-01-02,56,42.900525,0,268.49
2,2025-01-03,45,40.475678,3,213.51
3,2025-01-04,63,42.876714,2,289.86
4,2025-01-05,46,43.671941,2,225.92


In [7]:
# Driver summary for dashboard
driver_summary = trips_df[~trips_df['is_cancelled']].groupby('driver_id').agg({
    'trip_id': 'count',
    'efficiency_index': ['mean', 'std'],
    'distance_miles': 'sum',
    'score_on_time': 'mean',
    'score_route': 'mean'
}).round(4)

driver_summary.columns = ['trip_count', 'avg_efficiency', 'std_efficiency', 
                          'total_miles', 'avg_on_time', 'avg_route_efficiency']
driver_summary = driver_summary.reset_index()

print(f"✅ Driver summary: {len(driver_summary)} drivers")
driver_summary.head()

✅ Driver summary: 150 drivers


Unnamed: 0,driver_id,trip_count,avg_efficiency,std_efficiency,total_miles,avg_on_time,avg_route_efficiency
0,DRV_0000,35,40.4928,11.7006,167.93,62.5,18.3777
1,DRV_0001,29,43.0828,7.7994,139.76,70.6897,21.179
2,DRV_0002,31,46.0724,10.6393,155.31,66.5323,21.6797
3,DRV_0003,42,45.0564,9.8038,205.79,69.0476,21.9863
4,DRV_0004,37,43.3016,11.1963,175.88,64.1892,26.0653


In [8]:
# Region summary for dashboard
region_summary = trips_df.groupby('region').agg({
    'trip_id': 'count',
    'efficiency_index': 'mean',
    'is_cancelled': 'mean',
    'distance_miles': ['mean', 'sum']
}).round(4)

region_summary.columns = ['trip_count', 'avg_efficiency', 'cancellation_rate', 
                          'avg_distance', 'total_distance']
region_summary = region_summary.reset_index().sort_values('avg_efficiency', ascending=False)

print(f"Region summary: {len(region_summary)} regions")
print(region_summary)

Region summary: 5 regions
     region  trip_count  avg_efficiency  cancellation_rate  avg_distance  \
3  Region_4         942         44.0845             0.0276        4.9034   
2  Region_3         996         44.0558             0.0291        4.8559   
0  Region_1        1031         43.8885             0.0291        4.8797   
1  Region_2        1007         43.8124             0.0268        4.8696   
4  Region_5        1024         43.7269             0.0342        4.8300   

   total_distance  
3         4619.01  
2         4836.44  
0         5030.96  
1         4903.70  
4         4945.87  


<a id="testing"></a>
## 3. Component Testing

Testing the visualizations that will be used in the Streamlit dashboard.

In [9]:
# Test: Efficiency trend chart
fig = px.line(
    daily_metrics,
    x='date',
    y='avg_efficiency',
    title='Daily Average Efficiency Trend',
    labels={'avg_efficiency': 'Avg Efficiency', 'date': 'Date'}
)
fig.add_hline(y=0.7, line_dash="dash", annotation_text="Target: 70%")
fig.show()

In [11]:
# Test: Driver performance scatter
fig = px.scatter(
    driver_summary,
    x='trip_count',
    y='avg_efficiency',
    size='total_miles',
    hover_data=['driver_id', 'avg_on_time'],
    title='Driver Performance: Efficiency vs. Trip Volume'
)
fig.show()

In [12]:
# Test: Regional comparison bar chart
fig = px.bar(
    region_summary,
    x='region',
    y='avg_efficiency',
    color='trip_count',
    title='Regional Efficiency Comparison',
    labels={'avg_efficiency': 'Avg Efficiency', 'trip_count': 'Trip Count'}
)
fig.show()

In [13]:
# Test: KPI cards data
active_trips = trips_df[~trips_df['is_cancelled']]

kpis = {
    'Total Trips': f"{len(trips_df):,}",
    'Active Trips': f"{len(active_trips):,}",
    'Avg Efficiency': f"{active_trips['efficiency_index'].mean():.1%}",
    'Cancellation Rate': f"{trips_df['is_cancelled'].mean():.1%}",
    'Total Drivers': f"{len(drivers_df):,}",
    'Avg Distance': f"{active_trips['distance_miles'].mean():.1f} mi"
}

print("Dashboard KPIs:")
print("=" * 40)
for name, value in kpis.items():
    print(f"{name}: {value}")

Dashboard KPIs:
Total Trips: 5,000
Active Trips: 4,853
Avg Efficiency: 4524.0%
Cancellation Rate: 2.9%
Total Drivers: 150
Avg Distance: 4.9 mi


<a id="export"></a>
## 4. Export for Dashboard

In [16]:
# Export aggregated data for dashboard
dashboard_data_dir = PROCESSED_DIR / 'dashboard'
dashboard_data_dir.mkdir(exist_ok=True)

# Export files
daily_metrics.to_csv(dashboard_data_dir / 'daily_metrics.csv', index=False)
driver_summary.to_csv(dashboard_data_dir / 'driver_summary.csv', index=False)
region_summary.to_csv(dashboard_data_dir / 'region_summary.csv', index=False)

# Export KPIs as JSON
kpi_data = {
    'total_trips': len(trips_df),
    'active_trips': len(active_trips),
    'avg_efficiency': float(active_trips['efficiency_index'].mean()),
    'cancellation_rate': float(trips_df['is_cancelled'].mean()),
    'total_drivers': len(drivers_df),
    'avg_distance': float(active_trips['distance_miles'].mean())
}

import json
with open(dashboard_data_dir / 'kpis.json', 'w') as f:
    json.dump(kpi_data, f, indent=2)

print(f"✅ Dashboard data exported to {dashboard_data_dir}")

✅ Dashboard data exported to /Users/hc/Documents/projects/modivcare-rides-efficiency/data/processed/dashboard


<a id="deployment"></a>
## 5. Deployment Checklist

### Pre-Deployment Checks

- [x] Data files validated and complete
- [x] Aggregated data exported for performance
- [x] Visualizations tested
- [x] KPIs calculated and exported

### Streamlit App Requirements

```bash
# Install dependencies
pip install -r requirements.txt

# Run dashboard locally
streamlit run app/streamlit_app.py
```

### Deployment Options

| Platform | Pros | Cons |
|----------|------|------|
| **Streamlit Cloud** | Free, easy setup | Limited compute |
| **Heroku** | Flexible, scalable | Cost at scale |
| **AWS/GCP** | Full control | More setup required |

### Files to Deploy

```
modivcare-rides-efficiency/
├── app/
│   └── streamlit_app.py
├── src/
│   ├── config.py
│   ├── efficiency_scoring.py
│   ├── evaluation.py
│   └── utils.py
├── data/
│   └── processed/
│       ├── trips_with_efficiency.csv
│       ├── drivers.csv
│       └── dashboard/
├── requirements.txt
└── README.md
```

### Post-Deployment

1. Verify all tabs load correctly
2. Test filter functionality
3. Check mobile responsiveness
4. Monitor performance and errors

In [17]:
# Final summary
print("="*60)
print("MODIVCARE RIDES EFFICIENCY - PROJECT COMPLETE")
print("="*60)
print(f"""
Notebooks Completed:
  01 - Exploratory Data Analysis
  02 - Efficiency Index Development
  03 - Routing Simulation Development
  04 - Evaluation & Results Analysis
  05 - Dashboard Integration & Deployment

Key Deliverables:
  - Efficiency scoring algorithm (4 weighted components)
  - Routing simulation comparison (3 strategies)
  - Interactive Streamlit dashboard
  - Comprehensive documentation

Next Steps:
  - Deploy dashboard to Streamlit Cloud
  - Collect user feedback
  - Iterate on scoring weights
""")

MODIVCARE RIDES EFFICIENCY - PROJECT COMPLETE

Notebooks Completed:
  01 - Exploratory Data Analysis
  02 - Efficiency Index Development
  03 - Routing Simulation Development
  04 - Evaluation & Results Analysis
  05 - Dashboard Integration & Deployment

Key Deliverables:
  - Efficiency scoring algorithm (4 weighted components)
  - Routing simulation comparison (3 strategies)
  - Interactive Streamlit dashboard
  - Comprehensive documentation

Next Steps:
  - Deploy dashboard to Streamlit Cloud
  - Collect user feedback
  - Iterate on scoring weights

