# Day 15: UberPool Driver Earnings Optimization Strategies

You are a Business Analyst on the Uber Pool Product Team working to optimize driver compensation. The team aims to understand how trip characteristics impact driver earnings. Your goal is to develop data-driven recommendations that maximize driver earnings potential.

In [None]:
import pandas as pd
import numpy as np

fct_trips_data = [
  {
    "trip_id": 101,
    "driver_id": 1,
    "ride_type": "UberPool",
    "trip_date": "2024-07-05",
    "rider_count": 3,
    "total_distance": 10.5,
    "total_earnings": 22.5
  },
  {
    "trip_id": 102,
    "driver_id": 1,
    "ride_type": "UberPool",
    "trip_date": "2024-07-15",
    "rider_count": 2,
    "total_distance": 8,
    "total_earnings": 18
  },
  {
    "trip_id": 103,
    "driver_id": 2,
    "ride_type": "UberPool",
    "trip_date": "2024-08-10",
    "rider_count": 4,
    "total_distance": 15,
    "total_earnings": 35
  },
  {
    "trip_id": 104,
    "driver_id": 3,
    "ride_type": "UberX",
    "trip_date": "2024-07-20",
    "rider_count": 1,
    "total_distance": 5,
    "total_earnings": 12
  },
  {
    "trip_id": 105,
    "driver_id": 2,
    "ride_type": "UberPool",
    "trip_date": "2024-09-01",
    "rider_count": 3,
    "total_distance": 12,
    "total_earnings": 30
  },
  {
    "trip_id": 106,
    "driver_id": 4,
    "ride_type": "UberPool",
    "trip_date": "2024-09-15",
    "rider_count": 5,
    "total_distance": 20,
    "total_earnings": 50
  },
  {
    "trip_id": 107,
    "driver_id": 4,
    "ride_type": "UberPool",
    "trip_date": "2024-10-01",
    "rider_count": 3,
    "total_distance": 9,
    "total_earnings": 25
  },
  {
    "trip_id": 108,
    "driver_id": 5,
    "ride_type": "UberPool",
    "trip_date": "2024-08-25",
    "rider_count": 4,
    "total_distance": 11,
    "total_earnings": 28
  },
  {
    "trip_id": 109,
    "driver_id": 1,
    "ride_type": "UberPool",
    "trip_date": "2024-09-30",
    "rider_count": 3,
    "total_distance": 6,
    "total_earnings": 16
  },
  {
    "trip_id": 110,
    "driver_id": 2,
    "ride_type": "UberPool",
    "trip_date": "2024-07-07",
    "rider_count": 2,
    "total_distance": 7,
    "total_earnings": 15
  },
  {
    "trip_id": 111,
    "driver_id": 3,
    "ride_type": "UberPool",
    "trip_date": "2024-08-05",
    "rider_count": 4,
    "total_distance": 13,
    "total_earnings": 32
  },
  {
    "trip_id": 112,
    "driver_id": 5,
    "ride_type": "UberX",
    "trip_date": "2024-09-10",
    "rider_count": 1,
    "total_distance": 4,
    "total_earnings": 10
  },
  {
    "trip_id": 113,
    "driver_id": 6,
    "ride_type": "UberPool",
    "trip_date": "2024-07-30",
    "rider_count": 3,
    "total_distance": 22,
    "total_earnings": 45
  },
  {
    "trip_id": 114,
    "driver_id": 6,
    "ride_type": "UberPool",
    "trip_date": "2024-08-22",
    "rider_count": 4,
    "total_distance": 18,
    "total_earnings": 42
  },
  {
    "trip_id": 115,
    "driver_id": 7,
    "ride_type": "UberPool",
    "trip_date": "2024-09-21",
    "rider_count": 5,
    "total_distance": 25,
    "total_earnings": 60
  }
]
fct_trips = pd.DataFrame(fct_trips_data)


## Question 1

What is the average driver earnings per completed UberPool ride with more than two riders between July 1st and September 30th, 2024? This analysis will help isolate trips that meet specific rider thresholds to understand their impact on driver earnings.

In [None]:
# Note: pandas and numpy are already imported as pd and np
# The following tables are loaded as pandas DataFrames with the same names: fct_trips
# Please print your final result or dataframe
import pandas as pd

# Sample data from the problem
data = {
    "trip_id": [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    "driver_id": [1, 1, 2, 3, 2, 4, 4, 5, 1, 2],
    "ride_type": ["UberPool", "UberPool", "UberPool", "UberX", "UberPool", "UberPool", 
                  "UberPool", "UberPool", "UberPool", "UberPool"],
    "trip_date": ["2024-07-05", "2024-07-15", "2024-08-10", "2024-07-20", "2024-09-01",
                  "2024-09-15", "2024-10-01", "2024-08-25", "2024-09-30", "2024-07-07"],
    "rider_count": [3, 2, 4, 1, 3, 5, 3, 4, 3, 2],
    "total_distance": [10.5, 8, 15, 5, 12, 20, 9, 11, 6, 7],
    "total_earnings": [22.5, 18, 35, 12, 30, 50, 25, 28, 16, 15]
}

# Create DataFrame
df = pd.DataFrame(data)

# Convert trip_date to datetime
df["trip_date"] = pd.to_datetime(df["trip_date"])

# Filter conditions:
# 1. UberPool rides
# 2. More than 2 riders
# 3. Trip date between July 1 and Sept 30, 2024
filtered_df = df[
    (df["ride_type"] == "UberPool") &
    (df["rider_count"] > 2) &
    (df["trip_date"].between("2024-07-01", "2024-09-30"))
]

# Calculate average driver earnings per ride
avg_earnings = filtered_df["total_earnings"].mean()

print("Average driver earnings per UberPool ride with >2 riders (Jul 1 - Sep 30, 2024):", avg_earnings)

## Question 2

For completed UberPool rides between July 1st and September 30th, 2024, derive a new column calculating earnings per mile (total_earnings divided by total_distance) and then compute the average earnings per mile for rides with more than two riders. This calculation will reveal efficiency metrics for driver compensation.

In [None]:
import pandas as pd

# Sample data
data = {
    "trip_id": [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    "driver_id": [1, 1, 2, 3, 2, 4, 4, 5, 1, 2],
    "ride_type": ["UberPool", "UberPool", "UberPool", "UberX", "UberPool", 
                  "UberPool", "UberPool", "UberPool", "UberPool", "UberPool"],
    "trip_date": ["2024-07-05", "2024-07-15", "2024-08-10", "2024-07-20",
                  "2024-09-01", "2024-09-15", "2024-10-01", "2024-08-25",
                  "2024-09-30", "2024-07-07"],
    "rider_count": [3, 2, 4, 1, 3, 5, 3, 4, 3, 2],
    "total_distance": [10.5, 8, 15, 5, 12, 20, 9, 11, 6, 7],
    "total_earnings": [22.5, 18, 35, 12, 30, 50, 25, 28, 16, 15]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Convert trip_date to datetime
df["trip_date"] = pd.to_datetime(df["trip_date"])

# Filter: UberPool, date between July 1 and Sept 30, 2024, and rider_count > 2
mask = (
    (df["ride_type"] == "UberPool") &
    (df["trip_date"].between("2024-07-01", "2024-09-30")) &
    (df["rider_count"] > 2)
)
filtered_df = df[mask].copy()

# Add earnings per mile column
filtered_df["earnings_per_mile"] = filtered_df["total_earnings"] / filtered_df["total_distance"]

# Calculate average earnings per mile
avg_earnings_per_mile = filtered_df["earnings_per_mile"].mean()

print("Filtered Data:\n", filtered_df)
print("\nAverage Earnings per Mile (with >2 riders):", avg_earnings_per_mile)

## Question 3

Identify the combination of rider count and total distance that results in the highest average driver earnings per UberPool ride between July 1st and September 30th, 2024. This analysis directly recommends optimal trip combination strategies to maximize driver earnings.

In [None]:
import pandas as pd

# Ensure trip_date is datetime
fct_trips['trip_date'] = pd.to_datetime(fct_trips['trip_date'], errors='coerce')

# Filter: UberPool rides between 2024-07-01 and 2024-09-30
mask = (
    (fct_trips['ride_type'] == 'UberPool') &
    (fct_trips['trip_date'].between('2024-07-01', '2024-09-30'))
)

filtered = fct_trips.loc[mask, ['rider_count', 'total_distance', 'total_earnings']].copy()

# (Optional but safe) drop rows with missing required fields
filtered = filtered.dropna(subset=['rider_count', 'total_distance', 'total_earnings'])

# Average earnings per (rider_count, total_distance) combo
combo_avg = (
    filtered.groupby(['rider_count', 'total_distance'], as_index=False)
    .agg(avg_earnings=('total_earnings', 'mean'))
)

# Get the combo(s) with the highest average earnings (handles ties)
max_avg = combo_avg['avg_earnings'].max()
best_combos = combo_avg[combo_avg['avg_earnings'] == max_avg].reset_index(drop=True)

print(best_combos)

Made with ❤️ by [Interview Master](https://www.interviewmaster.ai)