In [1]:
%matplotlib inline

In [2]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as sts
import matplotlib as mpl

In [3]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [4]:
# Read the city and ride data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv("Resources/city_data.csv")
ride_data_df = pd.read_csv("Resources/ride_data.csv")

In [5]:
city_data_df.head()

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban


In [6]:
city_data_df.count()

city            120
driver_count    120
type            120
dtype: int64

In [7]:
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [8]:
ride_data_df.head()

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,1/14/2019 10:14,13.83,5739410000000.0
1,South Michelleport,3/4/2019 18:24,30.24,2343910000000.0
2,Port Samanthamouth,2/24/2019 4:29,33.44,2005070000000.0
3,Rodneyfort,2/10/2019 23:22,23.44,5149250000000.0
4,South Jack,3/6/2019 4:28,34.58,3908450000000.0


In [9]:
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [10]:
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id    float64
dtype: object

In [11]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,1/14/2019 10:14,13.83,5739410000000.0,5,Urban
1,South Michelleport,3/4/2019 18:24,30.24,2343910000000.0,72,Urban
2,Port Samanthamouth,2/24/2019 4:29,33.44,2005070000000.0,57,Urban
3,Rodneyfort,2/10/2019 23:22,23.44,5149250000000.0,34,Urban
4,South Jack,3/6/2019 4:28,34.58,3908450000000.0,46,Urban


In [12]:
total_fares = pyber_data_df.groupby(['type']).sum()['fare']
total_fares

type
Rural        4327.93
Suburban    19356.33
Urban       39854.38
Name: fare, dtype: float64

In [13]:
total_rides = pyber_data_df.groupby(['type']).count()['ride_id']
total_rides

type
Rural        125
Suburban     625
Urban       1625
Name: ride_id, dtype: int64

In [14]:
total_drivers = city_data_df.groupby(['type']).sum()['driver_count']
total_drivers

type
Rural         78
Suburban     490
Urban       2405
Name: driver_count, dtype: int64

In [15]:
average_fare_per_ride = total_fares / total_rides
average_fare_per_ride

type
Rural       34.623440
Suburban    30.970128
Urban       24.525772
dtype: float64

In [16]:
average_fare_per_driver = total_fares / total_drivers
average_fare_per_driver

type
Rural       55.486282
Suburban    39.502714
Urban       16.571468
dtype: float64

In [17]:
# Assemble into DataFrame.
total_summary_df = pd.DataFrame({'Total Rides' : total_rides,'Total Drivers' : total_drivers, 'Total Fares' : total_fares,'Average Fare per Ride' : average_fare_per_ride,'Average Fare per Driver' : average_fare_per_driver})

total_summary_df

Unnamed: 0_level_0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rural,125,78,4327.93,34.62344,55.486282
Suburban,625,490,19356.33,30.970128,39.502714
Urban,1625,2405,39854.38,24.525772,16.571468


In [18]:
# Formatting
total_summary_df['Total Rides'] = total_summary_df['Total Rides'].map("{:.0f}".format)

total_summary_df['Total Drivers'] = total_summary_df['Total Drivers'].map("{:,.0f}".format)

total_summary_df['Total Fares'] = total_summary_df['Total Fares'].map("${:,.2f}".format)

total_summary_df['Average Fare per Ride'] = total_summary_df['Average Fare per Ride'].map("${:.2f}".format)

total_summary_df['Average Fare per Driver'] = total_summary_df['Average Fare per Driver'].map("${:.2f}".format)

total_summary_df.index.name = None

total_summary_df

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
Rural,125,78,"$4,327.93",$34.62,$55.49
Suburban,625,490,"$19,356.33",$30.97,$39.50
Urban,1625,2405,"$39,854.38",$24.53,$16.57


In [19]:
pyber_data_df = pyber_data_df.rename(columns = {'city':'City', 'date':'Date','fare':'Fare', 'ride_id': 'Ride Id','driver_count': 'No. Drivers', 'type':'City Type'})

In [20]:
pyber_data_df = pyber_data_df.set_index(['Date'])
pyber_data_df

Unnamed: 0_level_0,City,Fare,Ride Id,No. Drivers,City Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1/14/2019 10:14,Lake Jonathanshire,13.83,5.739410e+12,5,Urban
3/4/2019 18:24,South Michelleport,30.24,2.343910e+12,72,Urban
2/24/2019 4:29,Port Samanthamouth,33.44,2.005070e+12,57,Urban
2/10/2019 23:22,Rodneyfort,23.44,5.149250e+12,34,Urban
3/6/2019 4:28,South Jack,34.58,3.908450e+12,46,Urban
...,...,...,...,...,...
4/29/2019 17:04,Michaelberg,13.38,8.550370e+12,6,Rural
1/30/2019 0:05,Lake Latoyabury,20.76,9.018730e+12,2,Rural
2/10/2019 21:03,North Jaime,11.11,2.781340e+12,1,Rural
5/7/2019 19:22,West Heather,44.94,4.256850e+12,4,Rural


In [21]:
pyber_data_df_copy = pyber_data_df[['City Type', 'Fare']].copy()
pyber_data_df_copy.head()

Unnamed: 0_level_0,City Type,Fare
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1/14/2019 10:14,Urban,13.83
3/4/2019 18:24,Urban,30.24
2/24/2019 4:29,Urban,33.44
2/10/2019 23:22,Urban,23.44
3/6/2019 4:28,Urban,34.58


In [22]:
pyber_data_df_copy.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2375 entries, 1/14/2019 10:14 to 4/25/2019 10:20
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   City Type  2375 non-null   object 
 1   Fare       2375 non-null   float64
dtypes: float64(1), object(1)
memory usage: 55.7+ KB


In [23]:
total_copy_fares_series = pyber_data_df_copy.groupby(['Date', 'City Type']).sum()['Fare']
total_copy_fares_series

Date            City Type
1/1/2019 0:08   Urban        37.91
1/1/2019 0:46   Suburban     47.74
1/1/2019 12:32  Suburban     25.56
1/1/2019 14:40  Urban         5.42
1/1/2019 14:42  Urban        12.31
                             ...  
5/8/2019 1:54   Urban        32.69
5/8/2019 2:31   Urban        41.33
5/8/2019 4:20   Urban        21.99
5/8/2019 4:39   Urban        18.45
5/8/2019 7:29   Urban        18.55
Name: Fare, Length: 2364, dtype: float64

In [24]:
pyber_data_df_copy = pyber_data_df_copy.pivot_table('Fare', ['Date'], 'City Type')
pyber_data_df_copy

City Type,Rural,Suburban,Urban
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2019 0:08,,,37.91
1/1/2019 0:46,,47.74,
1/1/2019 12:32,,25.56,
1/1/2019 14:40,,,5.42
1/1/2019 14:42,,,12.31
...,...,...,...
5/8/2019 1:54,,,32.69
5/8/2019 2:31,,,41.33
5/8/2019 4:20,,,21.99
5/8/2019 4:39,,,18.45


In [25]:
date_bins_df = pyber_data_df_copy.loc['1/1/2019' : '4/28/2019']
date_bins_df.head()

City Type,Rural,Suburban,Urban
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2019 0:08,,,37.91
1/1/2019 0:46,,47.74,
1/1/2019 12:32,,25.56,
1/1/2019 14:40,,,5.42
1/1/2019 14:42,,,12.31


In [27]:
date_weekly_bins = ['January', 'February', 'March', 'April']
pd.cut(date_bins_df, date_weekly_bins)

ValueError: Input array must be 1 dimensional

In [None]:
# total_copy_fares_df = pd.DataFrame(total_copy_fares_series)
# total_copy_fares_df.head()

In [None]:
# total_copy_summary_df = pyber_data_df[['City Type', 'Fare']].copy()
# total_copy_summary_df.head()

In [None]:
# total_copy_summary_df = pd.DataFrame([total_copy_fares])
# total_copy_summary_df

In [None]:
# # Create the Urban city DataFrame.
# urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
# urban_cities_df.head()

In [None]:
# # Create the Suburban city DataFrame.
# suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]
# suburban_cities_df.head()

In [None]:
# # Create the Rural city DataFrame.
# rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]
# rural_cities_df.head()

In [None]:
# # Get the number of rides for urban cities.
# urban_ride_count = urban_cities_df.groupby(["city"]).count()["ride_id"]
# urban_ride_count.head()

In [None]:
# # Get the total number of urban rides.
# urban_total_rides = urban_ride_count.sum()
# urban_total_rides

In [None]:
# # Get average fare for each city in the urban cities.
# urban_fare_count = urban_cities_df.groupby(["city"]).sum()["fare"]
# urban_fare_count.head()

In [None]:
# # Get the total fares of urban rides.
# urban_total_fares = urban_fare_count.sum()
# urban_total_fares

In [None]:
# # Get the average fare per urban rides.
# urban_average_fares = urban_total_fares / urban_total_rides
# urban_average_fares

In [None]:
# urban_total_drivers = urban_driver_count.sum()
# urban_total_drivers

In [None]:
# # Get the number of rides for Suburban cities.
# suburban_ride_count = suburban_cities_df.groupby(["city"]).count()["ride_id"]
# suburban_ride_count.head()

In [None]:
# # Get the total number of suburban rides.
# suburban_total_rides = suburban_ride_count.sum()
# suburban_total_rides

In [None]:
# # Get average fare for each city in the Suburban cities.
# suburban_avg_fare = suburban_cities_df.groupby(["city"]).mean()["fare"]
# suburban_avg_fare.head()

In [None]:
# # Get the number of rides for Rural cities.
# rural_ride_count = rural_cities_df.groupby(["city"]).count()["ride_id"]
# rural_ride_count.head()

In [None]:
# # Get the total number of rural rides.
# rural_total_rides = rural_ride_count.sum()
# rural_total_rides

In [None]:
# # Get average fare for each city in the rural cities.
# rural_avg_fare = rural_cities_df.groupby(["city"]).mean()["fare"]
# rural_avg_fare.head()