In [101]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.0f' % x)

In [102]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [103]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [104]:
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df=pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,1/14/19 10:14,14,5740000000000
1,South Michelleport,3/4/19 18:24,30,2340000000000
2,Port Samanthamouth,2/24/19 4:29,33,2010000000000
3,Rodneyfort,2/10/19 23:22,23,5150000000000
4,South Jack,3/6/19 4:28,35,3910000000000
5,South Latoya,3/11/19 12:26,10,2000000000000
6,New Paulville,2/27/19 11:17,43,793000000000
7,Simpsonburgh,4/26/19 0:43,36,112000000000
8,South Karenland,1/8/19 3:28,35,8000000000000
9,North Jasmine,3/9/19 6:26,43,5330000000000


In [105]:
# Get the columns and the rows that are not null.
city_data_df.count()

city            120
driver_count    120
type            120
dtype: int64

In [106]:
# Get the columns and the rows that are null.
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

In [107]:
# Get the data types of each column.
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [108]:
# Get the unique values of the type of city.
city_data_df["type"].unique()

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [109]:
# Get the number of data points from the urban cities.
sum(city_data_df["type"]=="Urban")

66

In [110]:
# Get the number of data points from the Suburban cities.
sum(city_data_df["type"]=="Suburban")

36

In [111]:
# Get the number of data points from the Rural cities.
sum(city_data_df["type"]=="Rural")

18

In [112]:
# Get the columns and the rows that are not null.
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [113]:
# Get the columns and the rows that are null.
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

In [114]:
# Get the data types of each column.
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id    float64
dtype: object

In [115]:
# Combine the data into a single dataset
pyber_data_df=pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,1/14/19 10:14,14,5740000000000,5,Urban
1,South Michelleport,3/4/19 18:24,30,2340000000000,72,Urban
2,Port Samanthamouth,2/24/19 4:29,33,2010000000000,57,Urban
3,Rodneyfort,2/10/19 23:22,23,5150000000000,34,Urban
4,South Jack,3/6/19 4:28,35,3910000000000,46,Urban


In [116]:
# Create the Urban city DataFrame.
urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
urban_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,1/14/19 10:14,14,5740000000000,5,Urban
1,South Michelleport,3/4/19 18:24,30,2340000000000,72,Urban
2,Port Samanthamouth,2/24/19 4:29,33,2010000000000,57,Urban
3,Rodneyfort,2/10/19 23:22,23,5150000000000,34,Urban
4,South Jack,3/6/19 4:28,35,3910000000000,46,Urban


In [117]:
# Create the Suburban city DataFrame.
suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]
suburban_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
1625,Barronchester,1/27/19 3:08,28,6650000000000,11,Suburban
1626,East Kentstad,4/7/19 19:44,19,6580000000000,20,Suburban
1627,Lake Omar,1/17/19 21:33,22,967000000000,22,Suburban
1628,Myersshire,2/27/19 17:38,17,5710000000000,19,Suburban
1629,West Hannah,4/19/19 1:06,38,2270000000000,12,Suburban


In [118]:
# Create the Rural city DataFrame.
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]
rural_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
2250,Randallchester,4/13/19 11:13,43,1080000000000,9,Rural
2251,North Holly,2/2/19 14:54,12,1990000000000,8,Rural
2252,Michaelberg,3/27/19 18:27,55,4420000000000,6,Rural
2253,Lake Latoyabury,2/23/19 21:12,48,3270000000000,2,Rural
2254,Lake Latoyabury,5/6/19 8:57,52,4020000000000,2,Rural


In [119]:
# Get the number of rides for urban cities.
urban_ride_count = urban_cities_df.groupby(["city"]).count()["ride_id"]
urban_ride_count.head()

city
Amandaburgh        18
Barajasview        22
Carriemouth        27
Christopherfurt    27
Deanville          19
Name: ride_id, dtype: int64

In [120]:
# Get the number of rides for Suburban cities.
suburban_ride_count = suburban_cities_df.groupby(["city"]).count()["ride_id"]
suburban_ride_count.head()

city
Barronchester    16
Bethanyland      18
Brandonfort      19
Colemanland      22
Davidfurt        17
Name: ride_id, dtype: int64

In [121]:
# Get the number of rides for Rural cities.
rural_ride_count = rural_cities_df.groupby(["city"]).count()["ride_id"]
rural_ride_count.head()

city
Bradshawfurt      10
Garzaport          3
Harringtonfort     6
Jessicaport        6
Lake Jamie         6
Name: ride_id, dtype: int64

In [122]:
# Get the average number of drivers for each urban city.
urban_driver_count = urban_cities_df.groupby(["city"]).mean()["driver_count"]
urban_driver_count.head()

city
Amandaburgh       12
Barajasview       26
Carriemouth       52
Christopherfurt   41
Deanville         49
Name: driver_count, dtype: float64

In [123]:
# Get the average number of drivers for each suburban city.
suburban_driver_count = suburban_cities_df.groupby(["city"]).mean()["driver_count"]
suburban_driver_count.head()

city
Barronchester   11
Bethanyland     22
Brandonfort     10
Colemanland     23
Davidfurt       23
Name: driver_count, dtype: float64