In [1]:
# Total Rides
# Total Drivers
# Total Fares
# Average Fare per Ride
# Average Fare per Driver

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
def create_summary_row(df, drivers_count):
    total_rides = df["ride_id"].count()
    total_fares = df["fare"].sum()
    avg_fare = total_fares / total_rides
    avg_fare_driver = total_fares / drivers_count
    return (total_rides,drivers_count,total_fares,avg_fare,avg_fare_driver)

In [4]:
def get_driver_count(city_data_df, c_type):
    driver_count = city_data_df[city_data_df["type"] == c_type]["driver_count"].sum()
    return driver_count

In [5]:
# Files to load
city_data_to_load = "resources/city_data.csv"
ride_data_to_load = "resources/ride_data.csv"

# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df = pd.read_csv(ride_data_to_load)

In [6]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])
# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [7]:
# Create the Urban, Suburban and Rural city DataFrame.
urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]
suburban_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
1625,Barronchester,2019-01-27 03:08:01,27.79,6653622887913,11,Suburban
1626,East Kentstad,2019-04-07 19:44:19,18.75,6575961095852,20,Suburban
1627,Lake Omar,2019-01-17 21:33:35,21.71,966911700371,22,Suburban
1628,Myersshire,2019-02-27 17:38:39,17.1,5706770909868,19,Suburban
1629,West Hannah,2019-04-19 01:06:59,37.78,2273047151891,12,Suburban


In [8]:
d_count_u = get_driver_count(city_data_df, "Urban")
d_count_s = get_driver_count(city_data_df, "Suburban")
d_count_r = get_driver_count(city_data_df, "Rural")

urban_row = create_summary_row(urban_cities_df,d_count_u)
suburban_row = create_summary_row(suburban_cities_df,d_count_s)
rural_row = create_summary_row(rural_cities_df,d_count_r)
rows = []
rows.append(rural_row)
rows.append(suburban_row)
rows.append(urban_row)
rows

[(125, 78, 4327.929999999999, 34.623439999999995, 55.486282051282046),
 (625, 490, 19356.33, 30.970128000000003, 39.50271428571429),
 (1625, 2405, 39854.380000000005, 24.52577230769231, 16.571467775467777)]

In [9]:
summary_df = pd.DataFrame(rows, columns=['Total Rides', 'Total Drivers', 'Total Fares', 'Average Fare per Ride', 'Average Fare per Driver'], index=['Rural','Suburban','Urban'])
summary_df.index.name = None

In [10]:
summary_df.style.format({'Total Rides': "{:,}",'Total Fares': "${:,.2f}",'Total Drivers': '{:,}', 'Average Fare per Ride': '${:.2f}', 'Average Fare per Driver': '${:.2f}'})

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
Rural,125,78,"$4,327.93",$34.62,$55.49
Suburban,625,490,"$19,356.33",$30.97,$39.50
Urban,1625,2405,"$39,854.38",$24.53,$16.57


In [11]:
pyber_data_df=pyber_data_df.rename(columns={'city': 'City', 'date':'Date','fare':'Fare', 'ride_id': 'Ride Id','driver_count': 'No. Drivers', 'type':'City Type'})
pyber_data_df.set_index('Date')

Unnamed: 0_level_0,City,Fare,Ride Id,No. Drivers,City Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-14 10:14:22,Lake Jonathanshire,13.83,5739410935873,5,Urban
2019-03-04 18:24:09,South Michelleport,30.24,2343912425577,72,Urban
2019-02-24 04:29:00,Port Samanthamouth,33.44,2005065760003,57,Urban
2019-02-10 23:22:03,Rodneyfort,23.44,5149245426178,34,Urban
2019-03-06 04:28:35,South Jack,34.58,3908451377344,46,Urban
...,...,...,...,...,...
2019-04-29 17:04:39,Michaelberg,13.38,8550365057598,6,Rural
2019-01-30 00:05:47,Lake Latoyabury,20.76,9018727594352,2,Rural
2019-02-10 21:03:50,North Jaime,11.11,2781339863778,1,Rural
2019-05-07 19:22:15,West Heather,44.94,4256853490277,4,Rural


In [12]:
fares_df = pyber_data_df[["Date","City","City Type","Fare"]].copy()
fares_df.head()

Unnamed: 0,Date,City,City Type,Fare
0,2019-01-14 10:14:22,Lake Jonathanshire,Urban,13.83
1,2019-03-04 18:24:09,South Michelleport,Urban,30.24
2,2019-02-24 04:29:00,Port Samanthamouth,Urban,33.44
3,2019-02-10 23:22:03,Rodneyfort,Urban,23.44
4,2019-03-06 04:28:35,South Jack,Urban,34.58


In [15]:
summary_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, Rural to Urban
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Total Rides              3 non-null      int64  
 1   Total Drivers            3 non-null      int64  
 2   Total Fares              3 non-null      float64
 3   Average Fare per Ride    3 non-null      float64
 4   Average Fare per Driver  3 non-null      float64
dtypes: float64(3), int64(2)
memory usage: 224.0+ bytes
