# Add Matplotlib inline magic command & import dependencies

In [41]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Create variables to load files

In [42]:
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

# Read the files and store them in pandas DataFrame

In [43]:
# Read city data and store into DataFrame
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [44]:
# Read ride data and store into DataFrame
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789


# Merge 2 DataFrames

In [45]:
# Combine 2 data into a single sheet
pyber_data_df = pd.merge(ride_data_df, city_data_df, how = "left", on = ["city", "city"])
# Display the DataFrame
pyber_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437,10,Urban
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091,44,Urban
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754,21,Urban
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694,4,Urban
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789,33,Urban


# Technical Analysis Deliverable 1: A Summary DataFrame

## Total Rides

In [46]:
# Get the sum of the rides for each city type.
sum_rides_by_type = pyber_data_df.groupby(["type"]).count()["ride_id"]
sum_rides_by_type

type
Rural        125
Suburban     625
Urban       1625
Name: ride_id, dtype: int64

## Total Drivers

In [47]:
# Get the sum of the drivers for each city type.
sum_drivers_by_type = pyber_data_df.groupby(["type"]).sum()["driver_count"]
sum_drivers_by_type

type
Rural         537
Suburban     8570
Urban       59602
Name: driver_count, dtype: int64

## Total Fares

In [48]:
# Get the sum of the fares for each city type.
sum_fares_by_type = pyber_data_df.groupby(["type"]).sum()["fare"]
sum_fares_by_type

type
Rural        4327.93
Suburban    19356.33
Urban       39854.38
Name: fare, dtype: float64

## Average Fare per Ride

In [49]:
average_fare_per_ride_by_type = sum_fares_by_type / sum_rides_by_type
average_fare_per_ride_by_type

type
Rural       34.623440
Suburban    30.970128
Urban       24.525772
dtype: float64

## Average Fare per Driver

In [50]:
average_fare_per_driver_by_type = sum_fares_by_type / sum_drivers_by_type
average_fare_per_driver_by_type

type
Rural       8.059460
Suburban    2.258615
Urban       0.668675
dtype: float64

In [51]:
# Adding a list of values with keys to create a new DataFrame.
summary_df = pd.DataFrame({
             "Total Rides": sum_rides_by_type,
             "Total Drivers": sum_drivers_by_type,
             "Total Fares": sum_fares_by_type,
             "Average Fare Per Ride": average_fare_per_ride_by_type,
             "Average Fare Per Driver": average_fare_per_driver_by_type})
summary_df["Total Rides"] = summary_df["Total Rides"].map("{:,}".format)
summary_df["Total Drivers"] = summary_df["Total Drivers"].map("{:,}".format)
summary_df["Total Fares"] = summary_df["Total Fares"].map("${:,.2f}".format)
summary_df["Average Fare Per Ride"] = summary_df["Average Fare Per Ride"].map("${:,.2f}".format)
summary_df["Average Fare Per Driver"] = summary_df["Average Fare Per Driver"].map("${:,.2f}".format)
summary_df.index.name = None
summary_df

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare Per Ride,Average Fare Per Driver
Rural,125,537,"$4,327.93",$34.62,$8.06
Suburban,625,8570,"$19,356.33",$30.97,$2.26
Urban,1625,59602,"$39,854.38",$24.53,$0.67


# Technical Analysis Deliverable 2: Multiple-Line Plot for the Sum of the Fares for Each City Type

## Rename columns

In [54]:
pyber_data_df.rename(columns = {'city':'City', 'date':'Date','fare':'Fare', 'ride_id': 'Ride Id','driver_count': 'No. Drivers', 'type':'City Type'}, inplace = True )
pyber_data_df.head(10)

Unnamed: 0,City,Date,Fare,Ride Id,No. Drivers,City Type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437,10,Urban
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091,44,Urban
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754,21,Urban
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694,4,Urban
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789,33,Urban


## Create new DataFrame for City Type and Fare

## Select columns Date, City Type, Fare

In [58]:
selected_columns = pyber_data_df[["Date", "City Type", "Fare"]]
date_cityType_fare_df = selected_columns.copy()
date_cityType_fare_df = date_cityType_fare_df.set_index(["Date"])
date_cityType_fare_df.head(10)

Unnamed: 0_level_0,City Type,Fare
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-14 10:14:22,Urban,13.83
2019-03-04 18:24:09,Urban,30.24
2019-02-24 04:29:00,Urban,33.44
2019-02-10 23:22:03,Urban,23.44
2019-03-06 04:28:35,Urban,34.58
2019-03-11 12:26:48,Urban,9.52
2019-02-27 11:17:56,Urban,43.25
2019-04-26 00:43:24,Urban,35.98
2019-01-08 03:28:48,Urban,35.09
2019-03-09 06:26:29,Urban,42.81


## Set the index to the datetime data type.

In [65]:
date_cityType_fare_df.index = pd.DatetimeIndex(date_cityType_fare_df.index)

In [71]:
date_cityType_fare_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2375 entries, 2019-01-14 10:14:22 to 2019-04-25 10:20:13
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   City Type  2375 non-null   object 
 1   Fare       2375 non-null   float64
dtypes: float64(1), object(1)
memory usage: 55.7+ KB


## Calculate the sum() of fares by the type of city and date using groupby() to create a Series.

In [75]:
sum_of_fares_by_cityType_date = date_cityType_fare_df.groupby(["Date"]).sum()["Fare"]
sum_of_fares_by_cityType_date.head()

Date
2019-01-01 00:08:16    37.91
2019-01-01 00:46:46    47.74
2019-01-01 02:07:24    24.07
2019-01-01 03:46:50     7.57
2019-01-01 05:23:21    10.75
Name: Fare, dtype: float64