In [1]:
### 4.3 Loading and Reading CSV files

In [1]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

# File to Load (Remember to change these)
city_data_to_load = "city_data.csv"
ride_data_to_load = "ride_data.csv"

# Read the City and Ride Data
city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)

In [2]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the data table for preview
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [3]:
pyber_data_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 2375 entries, 0 to 2374
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   city          2375 non-null   object 
 1   date          2375 non-null   object 
 2   fare          2375 non-null   float64
 3   ride_id       2375 non-null   int64  
 4   driver_count  2375 non-null   int64  
 5   type          2375 non-null   object 
dtypes: float64(1), int64(2), object(3)
memory usage: 129.9+ KB


In [4]:
## Deliverable 1: Get a Summary DataFrame

In [5]:
#  1. Get the total rides for each city type
total_rides = pyber_data_df.groupby(["city"])["ride_id"].count()
total_rides

city
Amandaburgh         18
Barajasview         22
Barronchester       16
Bethanyland         18
Bradshawfurt        10
                    ..
West Robert         31
West Samuelburgh    25
Williamsonville     14
Williamsstad        23
Williamsview        20
Name: ride_id, Length: 120, dtype: int64

In [6]:
# 2. Get the total drivers for each city type
total_drivers =  pyber_data_df.groupby(["city"])["driver_count"].sum()
total_drivers

city
Amandaburgh          216
Barajasview          572
Barronchester        176
Bethanyland          396
Bradshawfurt          70
                    ... 
West Robert         1209
West Samuelburgh    1825
Williamsonville       28
Williamsstad        1357
Williamsview         920
Name: driver_count, Length: 120, dtype: int64

In [7]:
#  3. Get the total amount of fares for each city type
total_fares = pyber_data_df.groupby(["city"])["fare"].sum()
total_fares

city
Amandaburgh         443.55
Barajasview         557.31
Barronchester       582.76
Bethanyland         593.21
Bradshawfurt        400.64
                     ...  
West Robert         778.84
West Samuelburgh    544.19
Williamsonville     446.25
Williamsstad        560.33
Williamsview        531.98
Name: fare, Length: 120, dtype: float64

In [8]:
#  4. Get the average fare per ride for each city type.
average_fare_per_ride = total_fares/total_rides
average_fare_per_ride

city
Amandaburgh         24.641667
Barajasview         25.332273
Barronchester       36.422500
Bethanyland         32.956111
Bradshawfurt        40.064000
                      ...    
West Robert         25.123871
West Samuelburgh    21.767600
Williamsonville     31.875000
Williamsstad        24.362174
Williamsview        26.599000
Length: 120, dtype: float64

In [9]:
# 5. Get the average fare per driver for each city type.
average_fare_per_driver = total_fares/total_drivers
average_fare_per_driver

city
Amandaburgh          2.053472
Barajasview          0.974318
Barronchester        3.311136
Bethanyland          1.498005
Bradshawfurt         5.723429
                      ...    
West Robert          0.644202
West Samuelburgh     0.298186
Williamsonville     15.937500
Williamsstad         0.412918
Williamsview         0.578239
Length: 120, dtype: float64

In [10]:
# get type of city
type_city =   pyber_data_df.groupby(["type"])["city"].count()
type_city

type
Rural        125
Suburban     625
Urban       1625
Name: city, dtype: int64

In [11]:
#per_school_types = school_data_df.set_index(["school_name"])["type"]
citytype = pyber_data_df.set_index(["city"])["type"]
citytype 

city
Lake Jonathanshire    Urban
South Michelleport    Urban
Port Samanthamouth    Urban
Rodneyfort            Urban
South Jack            Urban
                      ...  
Michaelberg           Rural
Lake Latoyabury       Rural
North Jaime           Rural
West Heather          Rural
Newtonview            Rural
Name: type, Length: 2375, dtype: object

In [33]:
#groups = df.groupby('a')['b'].apply(list)
group_citytype =    pyber_data_df.groupby(["city"])["type"].apply(list)
group_citytype

city
Amandaburgh         [Urban, Urban, Urban, Urban, Urban, Urban, Urb...
Barajasview         [Urban, Urban, Urban, Urban, Urban, Urban, Urb...
Barronchester       [Suburban, Suburban, Suburban, Suburban, Subur...
Bethanyland         [Suburban, Suburban, Suburban, Suburban, Subur...
Bradshawfurt        [Rural, Rural, Rural, Rural, Rural, Rural, Rur...
                                          ...                        
West Robert         [Urban, Urban, Urban, Urban, Urban, Urban, Urb...
West Samuelburgh    [Urban, Urban, Urban, Urban, Urban, Urban, Urb...
Williamsonville     [Suburban, Suburban, Suburban, Suburban, Subur...
Williamsstad        [Urban, Urban, Urban, Urban, Urban, Urban, Urb...
Williamsview        [Urban, Urban, Urban, Urban, Urban, Urban, Urb...
Name: type, Length: 120, dtype: object

In [13]:
#use in DataFrame
city_type_df = pd.DataFrame(group_citytype)
city_type_df

Unnamed: 0_level_0,type
city,Unnamed: 1_level_1
Amandaburgh,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Barajasview,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Barronchester,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Bethanyland,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Bradshawfurt,"[Rural, Rural, Rural, Rural, Rural, Rural, Rur..."
...,...
West Robert,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
West Samuelburgh,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Williamsonville,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Williamsstad,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."


In [15]:
#index, value = zip(*l_1d_index)
#index, value = zip(*pyber_data_df
 #pd.DataFrame(per_school_types)
    
per_city_type =  pd.DataFrame(citytype)
per_city_type

Unnamed: 0_level_0,type
city,Unnamed: 1_level_1
Lake Jonathanshire,Urban
South Michelleport,Urban
Port Samanthamouth,Urban
Rodneyfort,Urban
South Jack,Urban
...,...
Michaelberg,Rural
Lake Latoyabury,Rural
North Jaime,Rural
West Heather,Rural


In [16]:
#classify cities as rural,suburban,urban

In [17]:
#  7. Cleaning up the DataFrame. Delete the index name
#pyber_summary_df.index.name = None

In [18]:
#  8. Format the columns.


In [19]:
#  6. Create a PyBer summary DataFrame.
pyber_all_city_df = pd.DataFrame({
             "Total ride": total_rides,
             "Total driver": total_drivers,
             "Total fare": total_fares,
             "average fare per ride": average_fare_per_ride,
             "Average fare per driver": average_fare_per_driver,
                 "type of city " : group_citytype})
pyber_all_city_df.head(10)        

Unnamed: 0_level_0,Total ride,Total driver,Total fare,average fare per ride,Average fare per driver,type of city
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Amandaburgh,18,216,443.55,24.641667,2.053472,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Barajasview,22,572,557.31,25.332273,0.974318,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Barronchester,16,176,582.76,36.4225,3.311136,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Bethanyland,18,396,593.21,32.956111,1.498005,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Bradshawfurt,10,70,400.64,40.064,5.723429,"[Rural, Rural, Rural, Rural, Rural, Rural, Rur..."
Brandonfort,19,190,673.31,35.437368,3.543737,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Carriemouth,27,1404,764.49,28.314444,0.544509,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Christopherfurt,27,1107,661.55,24.501852,0.597606,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Colemanland,22,506,679.68,30.894545,1.343241,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Davidfurt,17,391,543.93,31.995882,1.391125,"[Suburban, Suburban, Suburban, Suburban, Subur..."


In [20]:
pyber_all_city_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 120 entries, Amandaburgh to Williamsview
Data columns (total 6 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Total ride               120 non-null    int64  
 1   Total driver             120 non-null    int64  
 2   Total fare               120 non-null    float64
 3   average fare per ride    120 non-null    float64
 4   Average fare per driver  120 non-null    float64
 5   type of city             120 non-null    object 
dtypes: float64(3), int64(2), object(1)
memory usage: 10.6+ KB


In [41]:
#  6. Create a PyBer summary DataFrame.
pyber_all_city_df = pd.DataFrame({
             "Total ride": total_rides,
             "Total driver": total_drivers,
             "Total fare": total_fares,
             "average fare per ride": average_fare_per_ride,
             "Average fare per driver": average_fare_per_driver,
                 "type of city " : group_citytype})
pyber_all_city_df.head(10)        

Unnamed: 0_level_0,Total ride,Total driver,Total fare,average fare per ride,Average fare per driver,type of city
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Amandaburgh,18,216,443.55,24.641667,2.053472,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Barajasview,22,572,557.31,25.332273,0.974318,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Barronchester,16,176,582.76,36.4225,3.311136,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Bethanyland,18,396,593.21,32.956111,1.498005,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Bradshawfurt,10,70,400.64,40.064,5.723429,"[Rural, Rural, Rural, Rural, Rural, Rural, Rur..."
Brandonfort,19,190,673.31,35.437368,3.543737,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Carriemouth,27,1404,764.49,28.314444,0.544509,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Christopherfurt,27,1107,661.55,24.501852,0.597606,"[Urban, Urban, Urban, Urban, Urban, Urban, Urb..."
Colemanland,22,506,679.68,30.894545,1.343241,"[Suburban, Suburban, Suburban, Suburban, Subur..."
Davidfurt,17,391,543.93,31.995882,1.391125,"[Suburban, Suburban, Suburban, Suburban, Subur..."
