In [5]:
import requests
import json
import browsercookie

class UberDriver:
    def __init__(self):
        # Download the cookies file by logging into the Uber Drivers page (https://drivers.uber.com/earnings/activities)
        # Then use this Chrome browser extension to save the cookies for that page to "cookies.txt" in the folder
        # of this code. https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc
        cj = browsercookie.firefox()
        cookies_str = "; ".join([f"{cookie.name}={cookie.value}" for cookie in cj])
        
        # Get this value from the Postman code snippet for the original request, which is formed in Postman
        # via import the cURL copied from the request URL in the Google Chrome Network Developer Console for
        # https://drivers.uber.com/earnings/activities
        self.headers = {
          'accept': '*/*',
          'accept-language': 'en-US,en;q=0.9',
          'content-type': 'application/json',
          'cookie': cookies_str,
          'origin': 'https://drivers.uber.com',
          'priority': 'u=1, i',
          'referer': 'https://drivers.uber.com/earnings/activities',
          'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
          'sec-ch-ua-mobile': '?0',
          'sec-ch-ua-platform': '"macOS"',
          'sec-fetch-dest': 'empty',
          'sec-fetch-mode': 'cors',
          'sec-fetch-site': 'same-origin',
          'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
          'x-csrf-token': 'x',
          'x-uber-earnings-seed': '939a975a5c8f7c135421d56458dcccb5'
        }

    def getRides(self, startDateIso, endDateIso):        
        url = "https://drivers.uber.com/earnings/api/getWebActivityFeed?localeCode=en"
    
        payload = json.dumps({
          "startDateIso": startDateIso,
          "endDateIso": endDateIso,
          "paginationOption": {}
        })
        
        response = requests.request("POST", url, headers=self.headers, data=payload)
        data = response.json()

        rides = data['data']['activities']
        while data['data']['pagination']['hasMoreData']:
            payload = json.dumps({
              "startDateIso": startDateIso,
              "endDateIso": endDateIso,
              "paginationOption": {"cursor": data['data']['pagination']['nextCursor']}
            })
            response = requests.request("POST", url, headers=self.headers, data=payload)
            data = response.json()
            if data['data']['activities']:
                rides = rides + data['data']['activities']
        if rides is None:
            return []
        else:
            return rides

    def getRideDetail(self, rideUuid):
        # This is only helpful to get additional fare breakdown from Uber, if we wanted to analyze how much
        # Uber is taking from each fare.
        url = f"https://drivers.uber.com/earnings/trips/{rideUuid}"
        response = requests.request("GET", url, headers=self.headers)
        return response.text

uber = UberDriver()

startDateIso = "2023-01-09"
endDateIso = "2023-01-16"
rides = uber.getRides(startDateIso, endDateIso)
print(f"Retrieved {len(rides)} rides for the period {startDateIso} - {endDateIso}.")

Retrieved 42 rides for the period 2023-01-09 - 2023-01-16.


In [6]:
from datetime import datetime, timedelta

# This is the date I started working as an Uber driver; modify for your start date
startDate = datetime.strptime("2023-01-09", "%Y-%m-%d")
endDate = datetime.today()
currentDate = startDate

rides = []
while currentDate <= endDate:
    nextDate = currentDate + timedelta(days=7)
    startDateIso = currentDate.strftime('%Y-%m-%d')
    endDateIso = nextDate.strftime('%Y-%m-%d')
    print(f"Getting rides for {startDateIso} - {endDateIso}...")
    newRides = uber.getRides(startDateIso, endDateIso)
    print(f"Retrieved {len(newRides)} rides.")
    rides += newRides
    currentDate = nextDate

print(f"Retrieved {len(rides)} rides total.")

Getting rides for 2023-01-09 - 2023-01-16...
Retrieved 42 rides.
Getting rides for 2023-01-16 - 2023-01-23...
Retrieved 9 rides.
Getting rides for 2023-01-23 - 2023-01-30...
Retrieved 13 rides.
Getting rides for 2023-01-30 - 2023-02-06...
Retrieved 0 rides.
Getting rides for 2023-02-06 - 2023-02-13...
Retrieved 26 rides.
Getting rides for 2023-02-13 - 2023-02-20...
Retrieved 26 rides.
Getting rides for 2023-02-20 - 2023-02-27...
Retrieved 7 rides.
Getting rides for 2023-02-27 - 2023-03-06...
Retrieved 29 rides.
Getting rides for 2023-03-06 - 2023-03-13...
Retrieved 9 rides.
Getting rides for 2023-03-13 - 2023-03-20...
Retrieved 51 rides.
Getting rides for 2023-03-20 - 2023-03-27...
Retrieved 59 rides.
Getting rides for 2023-03-27 - 2023-04-03...
Retrieved 26 rides.
Getting rides for 2023-04-03 - 2023-04-10...
Retrieved 0 rides.
Getting rides for 2023-04-10 - 2023-04-17...
Retrieved 0 rides.
Getting rides for 2023-04-17 - 2023-04-24...
Retrieved 0 rides.
Getting rides for 2023-04-24 - 2

In [7]:
# Let's dump all of these rides to a JSON file so we can reference this data outside of the script if need be, 
# or simply not have to retrieve from Uber again.
with open(f"rides-{endDate.strftime('%Y-%m-%d')}.json", "w") as file:
    json.dump(rides, file)
with open(f"rides-{endDate.strftime('%Y-%m-%d')}.json", "r") as file:
    rides = json.load(file)

rides[0]

{'uuid': 'd3096d6c-02bd-4f8e-855b-117588b27910',
 'recognizedAt': 1673809557,
 'activityTitle': 'Comfort',
 'formattedTotal': '$10.72',
 'routing': {'webviewUrl': 'https://drivers.uber.com/earnings/trips/d3096d6c-02bd-4f8e-855b-117588b27910',
  'deeplinkUrl': None},
 'breakdownDetails': {'formattedTip': '$1.00', 'formattedSurge': None},
 'tripMetaData': {'formattedDuration': '15 min 56 sec',
  'formattedDistance': '3.9 mi',
  'pickupAddress': 'N Ashland Ave, Chicago, IL 60614-1101, US',
  'dropOffAddress': 'W Madison St, Chicago, 60612, US',
  'mapUrl': 'https://static-maps.uber.com/map?width=360&height=100&marker=lat%3A41.92946%24lng%3A-87.66817%24icon%3Ahttps%3A%2F%2Fd1a3f4spazzrp4.cloudfront.net%2Fmaps%2Fhelix%2Fcar-pickup-pin.png%24anchorX%3A1.0%24anchorY%3A0.5&marker=lat%3A41.88207%24lng%3A-87.67135%24icon%3Ahttps%3A%2F%2Fd1a3f4spazzrp4.cloudfront.net%2Fmaps%2Fhelix%2Fcar-dropoff-pin.png%24anchorX%3A1.0%24anchorY%3A0.5&polyline=color%3A0xFF2DBAE4%24width%3A4%24enc%3Aaj%7C%7EF%60ua

In [8]:
import re

def parseTimeToSeconds(timeStr):
    matches = re.findall(r'(\d+)\s*(hr|min|sec)', timeStr)
    unitToSeconds = {'hr': 3600, 'min': 60, 'sec': 1}
    return sum(int(value) * unitToSeconds[unit] for value, unit in matches)

def parseMiles(milesStr):
    match = re.search(r'(\d+\.?\d*)\s*mi', milesStr)
    return float(match.group(1))

def parseCurrencyToFloat(currencyStr):
    cleanStr = currencyStr.replace('$', '').strip()
    return float(cleanStr)

def parseSeason(date):
    """Return the season for a given datetime object."""
    seasons = {
        'Spring': (3, 21, 6, 20),
        'Summer': (6, 21, 9, 20),
        'Fall': (9, 21, 12, 20),
        'Winter': (12, 21, 3, 20)
    }
    month = date.month
    day = date.day
    for season, (startMonth, startDay, endMonth, endDay) in seasons.items():
        if startMonth <= endMonth:
            if startMonth <= month <= endMonth:
                if (month == startMonth and day >= startDay) or (month == endMonth and day <= endDay) or (startMonth < month < endMonth):
                    return season
        else:
            if month > startMonth or month < endMonth or (month == startMonth and day >= startDay) or (month == endMonth and day <= endDay):
                return season

cleanedRides = []
for ride in rides:
    if ride.get('breakdownDetails'):
        tip = ride['breakdownDetails']['formattedTip'] or '$0.00'
        surge = ride['breakdownDetails']['formattedSurge'] or '$0.00'
    else:
        tip = '$0.00'
        surge = '$0.00'
    if ride.get('tripMetaData'):
        duration = parseTimeToSeconds(ride['tripMetaData']['formattedDuration'])
        distance = parseMiles(ride['tripMetaData']['formattedDistance'])
        pickupAddress = ride['tripMetaData']['pickupAddress']
        dropOffAddress = ride['tripMetaData']['dropOffAddress']
    else:
        duration = None
        distance = None
        pickupAddress = None
        dropOffAddress = None
    when = datetime.fromtimestamp(ride['recognizedAt'])
    cleanedRide = {
        'uuid': ride['uuid'],
        'date': when.strftime('%Y-%m-%d'),
        'time': when.strftime('%H:%M:%S'),
        'day': when.strftime('%A'),
        'day of week': when.weekday(),
        'sortable day of week': f"{when.weekday()} - {when.strftime('%A')}",
        'season': parseSeason(when),
        'type': ride['activityTitle'],
        'earnings': parseCurrencyToFloat(ride['formattedTotal']),
        'tip': parseCurrencyToFloat(tip),
        'surge': parseCurrencyToFloat(surge),
        'duration': duration,
        'distance': distance,
        'pickupAddress': pickupAddress,
        'dropoffAddress': dropOffAddress,
        'status': ride['status'],
        'note': ride['type']
    }
    cleanedRides.append(cleanedRide)

In [9]:
print(len(cleanedRides))

3319


In [10]:
cleanedRides[:10]

[{'uuid': 'd3096d6c-02bd-4f8e-855b-117588b27910',
  'date': '2023-01-15',
  'time': '13:05:57',
  'day': 'Sunday',
  'day of week': 6,
  'sortable day of week': '6 - Sunday',
  'season': 'Winter',
  'type': 'Comfort',
  'earnings': 10.72,
  'tip': 1.0,
  'surge': 0.0,
  'duration': 956,
  'distance': 3.9,
  'pickupAddress': 'N Ashland Ave, Chicago, IL 60614-1101, US',
  'dropoffAddress': 'W Madison St, Chicago, 60612, US',
  'status': 'COMPLETED',
  'note': 'TRIP'},
 {'uuid': '89a3e777-2000-44af-8509-765b157dfe9e',
  'date': '2023-01-15',
  'time': '12:45:31',
  'day': 'Sunday',
  'day of week': 6,
  'sortable day of week': '6 - Sunday',
  'season': 'Winter',
  'type': 'UberX',
  'earnings': 4.13,
  'tip': 0.0,
  'surge': 0.0,
  'duration': 835,
  'distance': 2.4,
  'pickupAddress': 'N Lincoln Ave, Chicago, IL 60613, US',
  'dropoffAddress': 'N Southport Ave, Chicago, IL 60614, US',
  'status': 'COMPLETED',
  'note': 'TRIP'},
 {'uuid': '78616fc5-6214-4f31-89a0-4229e35f0c5c',
  'date': 

In [11]:
cleanedRides[:10]

[{'uuid': 'd3096d6c-02bd-4f8e-855b-117588b27910',
  'date': '2023-01-15',
  'time': '13:05:57',
  'day': 'Sunday',
  'day of week': 6,
  'sortable day of week': '6 - Sunday',
  'season': 'Winter',
  'type': 'Comfort',
  'earnings': 10.72,
  'tip': 1.0,
  'surge': 0.0,
  'duration': 956,
  'distance': 3.9,
  'pickupAddress': 'N Ashland Ave, Chicago, IL 60614-1101, US',
  'dropoffAddress': 'W Madison St, Chicago, 60612, US',
  'status': 'COMPLETED',
  'note': 'TRIP'},
 {'uuid': '89a3e777-2000-44af-8509-765b157dfe9e',
  'date': '2023-01-15',
  'time': '12:45:31',
  'day': 'Sunday',
  'day of week': 6,
  'sortable day of week': '6 - Sunday',
  'season': 'Winter',
  'type': 'UberX',
  'earnings': 4.13,
  'tip': 0.0,
  'surge': 0.0,
  'duration': 835,
  'distance': 2.4,
  'pickupAddress': 'N Lincoln Ave, Chicago, IL 60613, US',
  'dropoffAddress': 'N Southport Ave, Chicago, IL 60614, US',
  'status': 'COMPLETED',
  'note': 'TRIP'},
 {'uuid': '78616fc5-6214-4f31-89a0-4229e35f0c5c',
  'date': 

In [12]:
import pandas
ridesDataframe = pandas.DataFrame(cleanedRides)
print(ridesDataframe.describe())

       day of week     earnings          tip        surge     duration  \
count  3319.000000  3319.000000  3319.000000  3319.000000  3219.000000   
mean      3.173848    11.146731     1.330717     0.684164  1002.362535   
std       1.766909    10.363576     2.463002     1.585636   742.943702   
min       0.000000     0.000000     0.000000     0.000000     0.000000   
25%       2.000000     6.075000     0.000000     0.000000   528.000000   
50%       3.000000     9.540000     0.000000     0.000000   843.000000   
75%       5.000000    13.955000     3.000000     0.000000  1298.000000   
max       6.000000   400.000000    28.780000    15.000000  7620.000000   

          distance  
count  3219.000000  
mean      4.522709  
std       4.504905  
min       0.000000  
25%       1.700000  
50%       3.300000  
75%       5.900000  
max      47.900000  


In [13]:
# Compute summary statistics for each column, including handling None values
stats = ridesDataframe.describe(include='all')

# Count None values per column
null_count = ridesDataframe.isnull().sum()

# Display the statistics and None count
print(stats)
print("\nCount of None values per column:\n", null_count)

# Additional information on string handling
print("\nAdditional Info:")
for column in ridesDataframe.columns:
    if ridesDataframe[column].dtype == 'object':  # Handling for strings and mixed types
        unique_strings = ridesDataframe[column].dropna().unique()
        print(f"Unique values in column '{column}': {unique_strings}")

                                        uuid        date      time     day  \
count                                   3319        3319      3319    3319   
unique                                  3319         268      3180       7   
top     d3096d6c-02bd-4f8e-855b-117588b27910  2024-01-06  04:00:00  Friday   
freq                                       1          41        18     755   
mean                                     NaN         NaN       NaN     NaN   
std                                      NaN         NaN       NaN     NaN   
min                                      NaN         NaN       NaN     NaN   
25%                                      NaN         NaN       NaN     NaN   
50%                                      NaN         NaN       NaN     NaN   
75%                                      NaN         NaN       NaN     NaN   
max                                      NaN         NaN       NaN     NaN   

        day of week sortable day of week  season   type     ear

In [15]:
# Let's filter the data to only include completed rides of humans
filteredRides = [ride for ride in cleanedRides if ride['status'] == 'COMPLETED' 
                                                 and ride['note'] == 'TRIP' 
                                                 and ride['type'] in ['Comfort', 'UberX', 'UberXL', 'UberX Share', 
                                                                      'UberX Priority', 'Uber Pet', 'Business Comfort']]

# Let's add some calculated columns now to skip the manual processing in a spreadsheet
enrichedRides = []
for ride in filteredRides:
    enrichedRide = ride.copy()
    enrichedRide['earnings-surge'] = ride['earnings'] - ride['surge']
    enrichedRide['earnings/second'] = ride['earnings'] / ride['duration']
    enrichedRide['earnings/mile'] = ride['earnings'] / ride['distance']
    del enrichedRide['status']
    del enrichedRide['note']
    enrichedRides.append(enrichedRide)
    
import csv
with open(f"Uber Rides - 2023-01-09 to {datetime.today().strftime('%Y-%m-%d')}.csv", 'w') as file:
    dw = csv.DictWriter(file, fieldnames=enrichedRides[0].keys())
    dw.writeheader()
    dw.writerows(enrichedRides)

with open(f"Uber All Trips - 2023-01-09 to {datetime.today().strftime('%Y-%m-%d')}.csv", 'w') as file:
    dw = csv.DictWriter(file, fieldnames=cleanedRides[0].keys())
    dw.writeheader()
    dw.writerows(cleanedRides)
