# PyBer Challenge
**Objective**: Determine if there is a correlation between the average fare and the total rides for each city type for the individual scatter plots, and if there is any statistical significance between the different city types for each box-and-whisker plot.

## Tasks
1. Create a PyBer Summary DataFrame for each city type:
    * Total Rides
    * Total Drivers
    * Total Fares
    * Average Fare per Ride
    * Average Fare per Driver
2. Create a Multiple-Line Plot for the sum of the fares for each city type.

### Notebook Setup

In [1]:
# Add Matplotlib inline magic command
%matplotlib inline

# Setup dependencies
import matplotlib.pyplot as plt
import pandas as pd
import os

In [2]:
# Data files to load
city_data_to_load = os.path.join("Resources", "city_data.csv")
ride_data_to_load = os.path.join("Resources", "ride_data.csv")

# Read data files and store them in a Pandas DataFrame
city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)

### Data Cleaning

In [7]:
city_data_df.count()

city            120
driver_count    120
type            120
dtype: int64

In [6]:
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

In [9]:
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [10]:
city_data_df["type"].unique()

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [11]:
sum(city_data_df["type"]=="Urban")

66

In [12]:
sum(city_data_df["type"]=="Suburban")

36

In [13]:
sum(city_data_df["type"]=="Rural")

18

In [14]:
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [16]:
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

In [17]:
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id      int64
dtype: object

### Merge DataFrames

In [65]:
# Combine data into a single data
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

pyber_data_df.dtypes

city             object
date             object
fare            float64
ride_id           int64
driver_count      int64
type             object
dtype: object

## Task 1: Create PyBer Summary DataFrame

In [138]:
# Set up Rural Cities DF, Suburban Cities DF, and Urban Cities DF
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]
urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]

#### Calculate the total number of rides in each City Type

In [175]:
total_rides_by_city_type = pyber_data_df.groupby(["type"]).count()["ride_id"]
total_rides_by_city_type

type
Rural        125
Suburban     625
Urban       1625
Name: ride_id, dtype: int64

In [176]:
# Get the total number of rides in Rural Cities
rural_ride_count = rural_cities_df.groupby(["city"]).count()["ride_id"]
rural_ride_count.head(10)

city
Bradshawfurt       10
Garzaport           3
Harringtonfort      6
Jessicaport         6
Lake Jamie          6
Lake Latoyabury    11
Michaelberg        12
New Ryantown        6
Newtonview          4
North Holly         9
Name: ride_id, dtype: int64

In [177]:
rural_ride_sum = sum(rural_cities_df.groupby(["city"]).count()["ride_id"])
rural_ride_sum

125

In [178]:
# Get the total number of rides in Suburban Cities
suburban_ride_count = suburban_cities_df.groupby(["city"]).count()["ride_id"]
suburban_ride_count.head(10)

city
Barronchester      16
Bethanyland        18
Brandonfort        19
Colemanland        22
Davidfurt          17
East Aaronbury      9
East Danielview    17
East Kentstad      13
East Marymouth     27
Grayville          15
Name: ride_id, dtype: int64

In [179]:
suburban_ride_sum = sum(suburban_cities_df.groupby(["city"]).count()["ride_id"])
suburban_ride_sum

625

In [180]:
# Get the total number of rides in Urban Cities
urban_ride_count = urban_cities_df.groupby(["city"]).count()["ride_id"]
urban_ride_count.head(10)

city
Amandaburgh        18
Barajasview        22
Carriemouth        27
Christopherfurt    27
Deanville          19
East Kaylahaven    29
Erikaland          12
Grahamburgh        25
Huntermouth        24
Hurleymouth        28
Name: ride_id, dtype: int64

In [189]:
urban_ride_sum = sum(urban_cities_df.groupby(["city"]).count()["ride_id"])
urban_ride_sum

1625

#### Calculate the total number of drviers in each City Type

In [182]:
# Get the total number of drivers in Rural Cities
rural_drivers_count = rural_cities_df.groupby(["city"]).count()["driver_count"]
rural_drivers_count.head()

city
Bradshawfurt      10
Garzaport          3
Harringtonfort     6
Jessicaport        6
Lake Jamie         6
Name: driver_count, dtype: int64

In [191]:
# Get the total number of drivers in Suburban Cities
suburban_drivers_count = suburban_cities_df.groupby(["city"]).count()["driver_count"]
suburban_drivers_count.head(10)

city
Barronchester      16
Bethanyland        18
Brandonfort        19
Colemanland        22
Davidfurt          17
East Aaronbury      9
East Danielview    17
East Kentstad      13
East Marymouth     27
Grayville          15
Name: driver_count, dtype: int64

In [192]:
# Get the total number of drivers in Urban Cities
urban_drivers_count = urban_cities_df.groupby(["city"]).count()["driver_count"]
urban_drivers_count.head(10)

city
Amandaburgh        18
Barajasview        22
Carriemouth        27
Christopherfurt    27
Deanville          19
East Kaylahaven    29
Erikaland          12
Grahamburgh        25
Huntermouth        24
Hurleymouth        28
Name: driver_count, dtype: int64

#### Calculate the total fares in each City Type

In [193]:
# Get the total fares in Rural Cities
rural_fares = rural_cities_df.groupby(["city"]).sum()["fare"]
rural_fares.head()

city
Bradshawfurt      400.64
Garzaport          72.37
Harringtonfort    200.82
Jessicaport       216.08
Lake Jamie        206.15
Name: fare, dtype: float64

In [194]:
rural_fares.sum()

4327.93

In [161]:
# Get the total fares in Suburban Cities
suburban_fares = suburban_cities_df.groupby(["city"]).sum()["fare"]
suburban_fares.head()

city
Barronchester    582.76
Bethanyland      593.21
Brandonfort      673.31
Colemanland      679.68
Davidfurt        543.93
Name: fare, dtype: float64

In [195]:
suburban_fares.sum()

19356.33

In [162]:
# Get the total fares in Urban Cities
urban_fares = urban_cities_df.groupby(["city"]).sum()["fare"]
urban_fares.head()

city
Amandaburgh        443.55
Barajasview        557.31
Carriemouth        764.49
Christopherfurt    661.55
Deanville          491.01
Name: fare, dtype: float64

In [196]:
urban_fares.sum()

39854.38000000001

#### Calculate the average fare per ride in each City Type

In [197]:
rural_avgfare_perride = rural_fares / rural_ride_count
rural_avgfare_perride

city
Bradshawfurt         40.064000
Garzaport            24.123333
Harringtonfort       33.470000
Jessicaport          36.013333
Lake Jamie           34.358333
Lake Latoyabury      26.061818
Michaelberg          34.997500
New Ryantown         43.278333
Newtonview           36.745000
North Holly          29.130000
North Jaime          30.802500
Penaborough          35.246000
Randallchester       29.742000
South Jennifer       35.264286
South Marychester    41.870000
South Saramouth      36.160000
Taylorhaven          42.263333
West Heather         33.890000
dtype: float64

In [198]:
suburban_avgfare_perride = suburban_fares / suburban_ride_count
suburban_avgfare_perride

city
Barronchester         36.422500
Bethanyland           32.956111
Brandonfort           35.437368
Colemanland           30.894545
Davidfurt             31.995882
East Aaronbury        25.661111
East Danielview       31.560588
East Kentstad         29.823077
East Marymouth        30.835185
Grayville             27.763333
Josephside            32.858148
Lake Ann              30.890000
Lake Omar             28.075000
Lake Robertside       31.262000
Lake Scott            31.886111
Lewishaven            25.241667
Lewisland             34.614118
Mezachester           30.760000
Myersshire            30.203125
New Olivia            34.054118
New Raymond           27.961818
New Shannonberg       28.378235
Nicolechester         30.909474
North Jeffrey         29.242308
North Richardhaven    24.704286
North Timothy         31.262000
Port Shane            31.077895
Rodriguezview         30.745333
Sotoville             31.980000
South Brenda          33.964583
South Teresa          31.220455
Ver

In [200]:
urban_avgfare_perride = urban_fares / urban_ride_count
urban_avgfare_perride

city
Amandaburgh            24.641667
Barajasview            25.332273
Carriemouth            28.314444
Christopherfurt        24.501852
Deanville              25.842632
                         ...    
West Patrickchester    28.233125
West Robert            25.123871
West Samuelburgh       21.767600
Williamsstad           24.362174
Williamsview           26.599000
Length: 66, dtype: float64

#### Calculate the average fare per driver in each City Type

In [201]:
rural_avgfare_perdriver = rural_fares / rural_drivers_count
rural_avgfare_perdriver

city
Bradshawfurt         40.064000
Garzaport            24.123333
Harringtonfort       33.470000
Jessicaport          36.013333
Lake Jamie           34.358333
Lake Latoyabury      26.061818
Michaelberg          34.997500
New Ryantown         43.278333
Newtonview           36.745000
North Holly          29.130000
North Jaime          30.802500
Penaborough          35.246000
Randallchester       29.742000
South Jennifer       35.264286
South Marychester    41.870000
South Saramouth      36.160000
Taylorhaven          42.263333
West Heather         33.890000
dtype: float64

In [202]:
suburban_avgfare_perdriver = suburban_fares / suburban_drivers_count
suburban_avgfare_perdriver

city
Barronchester         36.422500
Bethanyland           32.956111
Brandonfort           35.437368
Colemanland           30.894545
Davidfurt             31.995882
East Aaronbury        25.661111
East Danielview       31.560588
East Kentstad         29.823077
East Marymouth        30.835185
Grayville             27.763333
Josephside            32.858148
Lake Ann              30.890000
Lake Omar             28.075000
Lake Robertside       31.262000
Lake Scott            31.886111
Lewishaven            25.241667
Lewisland             34.614118
Mezachester           30.760000
Myersshire            30.203125
New Olivia            34.054118
New Raymond           27.961818
New Shannonberg       28.378235
Nicolechester         30.909474
North Jeffrey         29.242308
North Richardhaven    24.704286
North Timothy         31.262000
Port Shane            31.077895
Rodriguezview         30.745333
Sotoville             31.980000
South Brenda          33.964583
South Teresa          31.220455
Ver

In [204]:
urban_avgfare_perdriver = urban_fares / urban_drivers_count
urban_avgfare_perdriver

city
Amandaburgh            24.641667
Barajasview            25.332273
Carriemouth            28.314444
Christopherfurt        24.501852
Deanville              25.842632
                         ...    
West Patrickchester    28.233125
West Robert            25.123871
West Samuelburgh       21.767600
Williamsstad           24.362174
Williamsview           26.599000
Length: 66, dtype: float64