# Import Data into Pandas DataFrame 

In [1]:
# add matplotlib inline magic command
%matplotlib inline
# dependencies and setup 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import csv 


In [2]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"


In [5]:
# read the city data file and store it in a pandas dataframe
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [6]:
# read the ride data file and store it as a pandas data frame
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789


# Inspect Data in DataFrames

## City Data

checking for nulls

In [7]:
# get columns and rows that are not null
city_data_df.count()

city            120
driver_count    120
type            120
dtype: int64

In [9]:
# double check there arent any nulls
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

checking data types

In [10]:
# check the data type on the columns we will use for calculations
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [11]:
# check number of data points 
city_data_df.sum()

city            RichardfortWilliamsstadPort AngelaRodneyfortWe...
driver_count                                                 2973
type            UrbanUrbanUrbanUrbanUrbanUrbanUrbanUrbanUrbanU...
dtype: object

Get the types of cities

In [14]:
# get unique values of the type of city
city_data_df['type'].unique()


array(['Urban', 'Suburban', 'Rural'], dtype=object)

number of rides in urban cities

In [17]:
# get the numbder of data points for the urban cities
# make sure teh string you are looking for matches exactly, check your capitalization
sum(city_data_df['type']=='Urban')

66

number of rides in rural cities

In [18]:
# get number of rural data points
sum(city_data_df['type']=='Rural')

18

number of rides in suburban cities

In [19]:
# get number of suburban
sum(city_data_df['type']=='Suburban')

36

## Ride Data

checking for those nulls

In [20]:
# get the columns and rows that are not null
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [21]:
# double check it
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

get data types

In [22]:
# check the data types
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id      int64
dtype: object

# Merge DataFrames

* merging the 2 dataframes requires a common column, in this case 'city' is the column shared by both sets. 
* syntax follows this: new_df = pd.merge(leftdf, rightdf, on=["column_leftdf", "column_rightdf"])
* 'how=' parameter sometimes is used, either left right inner or outer, default is inner

In [24]:
# combine dataframes into single one
pyber_data_df = pd.merge(ride_data_df,city_data_df, how='left', on=['city','city'])

# print the new dataframe
pyber_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437,10,Urban
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091,44,Urban
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754,21,Urban
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694,4,Urban
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789,33,Urban
