In [7]:
# Add Matplotlib inline magic command
%matplotlib inline

# Import Dependencies & Setup
import matplotlib as plt
import pandas as pd
import numpy as np
import os

In [8]:
# Files to load
city_file = os.path.join("Resources", "city_data.csv")
ride_file = os.path.join("Resources", "ride_data.csv")

# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_file)
ride_data_df = pd.read_csv(ride_file)

In [9]:
# Display city_data_df
city_data_df.head()

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban


In [10]:
# Display ride_data_df
ride_data_df.head()

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344


In [22]:
# Inspect city_data_df for any missing values (NaNs), inappropriate data etc.
city_data_df.count()

city            120
driver_count    120
type            120
dtype: int64

In [23]:
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

In [24]:
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [28]:
# Get the unique values of the type of city.
city_data_df["type"].unique()

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [32]:
# Get the number of data points from the Urban cities.
city_data_df["type"].value_counts()

Urban       66
Suburban    36
Rural       18
Name: type, dtype: int64

In [25]:
# Inspect ride_data_df for any missing values (NaNs), inappropriate data etc.
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [26]:
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

In [27]:
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id      int64
dtype: object

In [35]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, 
                         how="left", 
                         on=["city", "city"]
                        )
pyber_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437,10,Urban
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091,44,Urban
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754,21,Urban
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694,4,Urban
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789,33,Urban


In [38]:
pyber_data_df.isnull().sum()

city            0
date            0
fare            0
ride_id         0
driver_count    0
type            0
dtype: int64

In [39]:
pyber_data_df.dtypes

city             object
date             object
fare            float64
ride_id           int64
driver_count      int64
type             object
dtype: object