In [57]:

# Dependencies and Setup
import matplotlib.pylab as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

# Read the City and Ride Data
city_df = pd.read_csv(city_data_to_load)
ride_df = pd.read_csv(ride_data_to_load)

In [58]:
# Display the data table for preview
city_df.head()

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban


In [59]:
ride_df.head()

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2018-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2018-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2018-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2018-02-10 23:22:03,23.44,5149245426178
4,South Jack,2018-03-06 04:28:35,34.58,3908451377344


In [60]:
# Combine the data into a single dataset
merged_df = pd.merge(ride_df,city_df,how = "left",on = ["city"])
merged_df.set_index("city")
merged_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2018-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2018-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2018-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2018-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2018-03-06 04:28:35,34.58,3908451377344,46,Urban


In [61]:
relevant_df = merged_df [["city","fare","driver_count","type"]]
relevant_df.head()

Unnamed: 0,city,fare,driver_count,type
0,Lake Jonathanshire,13.83,5,Urban
1,South Michelleport,30.24,72,Urban
2,Port Samanthamouth,33.44,57,Urban
3,Rodneyfort,23.44,34,Urban
4,South Jack,34.58,46,Urban


## Bubble Plot of Ride Sharing Data

In [62]:


# Build the scatter plots for each city types

# Incorporate the other graph properties

# Create a legend

# Incorporate a text label regarding circle size

# Save Figure


In [63]:
# Obtain the x and y coordinates for each of the three city types

In [64]:
#Urban cities

In [65]:
#Rides per city
urban_data = relevant_df.loc[relevant_df["type"]=="Urban"]
urban_grouped = urban_data.groupby(["city"])
urban_series = urban_grouped.count()["fare"]
rides_per_urban = urban_series.values

In [66]:
#City Average Fare
urban_average_series = urban_grouped.mean()["fare"]
urban_average_fare = urban_average_series.values

In [67]:
#Suburban cities

In [68]:
#Rides per city
suburban_data = relevant_df.loc[relevant_df["type"]=="Suburban"]
suburban_grouped = suburban_data.groupby(["city"])
suburban_series = suburban_grouped.count()["fare"]
rides_per_suburban = suburban_series.values

In [69]:
#City Average Fare
suburban_average_series = suburban_grouped.mean()["fare"]
suburban_average_fare = suburban_average_series.values

In [70]:
#Rural cities

In [71]:
#Rides per city
rural_data = relevant_df.loc[relevant_df["type"]=="Rural"]
rural_grouped = rural_data.groupby(["city"])
rural_series = rural_grouped.count()["fare"]
rides_per_rural = rural_series.values

In [72]:
#City Average Fare
rural_average_series = rural_grouped.mean()["fare"]
rural_average_fare = rural_average_series.values

In [73]:
#Combined Data

In [74]:
#combined_list_rides = list(rides_per_urban)+list(rides_per_suburban)+list(rides_per_rural)
#combined_rides_per = np.array(combined_list_rides)

In [75]:
#combined_list_fares = list(urban_average_fare) + list(suburban_average_fare) + list(rural_average_fare)
#combined_average_fares = np.array(combined_list_fares)

In [76]:
urban_driver_count = urban_data.groupby(["city"])["driver_count"].mean().values
suburban_driver_count = suburban_data.groupby(["city"])["driver_count"].mean().values
rural_driver_count = rural_data.groupby(["city"])["driver_count"].mean().values

In [77]:
%matplotlib notebook

In [78]:
plt.figure(figsize=(10,7))
plt.scatter(rides_per_urban,urban_average_fare,color="lightcoral",alpha =0.75,edgecolor='black',
            s=urban_driver_count*10,label="Urban")

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x1d8f5fcceb8>

In [79]:
plt.scatter(rides_per_suburban,suburban_average_fare,color="lightskyblue",
            alpha =0.75,edgecolor='black',s=suburban_driver_count*10,label = "Suburban")

<matplotlib.collections.PathCollection at 0x1d8f5e29a20>

In [80]:
plt.scatter(rides_per_rural,rural_average_fare,color="gold",alpha =0.75,
            edgecolor='black',s=rural_driver_count*10,label = "Rural")

<matplotlib.collections.PathCollection at 0x1d8f5e2f748>

In [81]:
plt.grid()

In [82]:
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")

Text(82.87499738401837, 0.5, 'Average Fare ($)')

In [83]:
lgnd = plt.legend(loc = "upper right",title= "City Types")
lgnd.legendHandles[0]._sizes = [30]
lgnd.legendHandles[1]._sizes = [30]
lgnd.legendHandles[2]._sizes = [30]


In [84]:
plt.savefig("Images/taxiBubbleChart.png")

The scatter plot suggests that there are many more drivers in urban areas, there is much more demand for taxis, and therefore the prices are much lower than in rural areas. As we move from urban areas towards rural areas, there is much less demand for taxis, so there are less drivers and the prices are much higher.

## Total Fares by City Type

In [85]:
# Calculate Type Percents
relevant_df.head()

Unnamed: 0,city,fare,driver_count,type
0,Lake Jonathanshire,13.83,5,Urban
1,South Michelleport,30.24,72,Urban
2,Port Samanthamouth,33.44,57,Urban
3,Rodneyfort,23.44,34,Urban
4,South Jack,34.58,46,Urban


In [86]:
grouped_by_type = relevant_df.groupby(["type"])
total_fare = grouped_by_type["fare"].sum().sum()

In [87]:
urban_fare = grouped_by_type["fare"].sum()["Urban"]
suburban_fare = grouped_by_type["fare"].sum()["Suburban"]
rural_fare = grouped_by_type["fare"].sum()["Rural"]

In [88]:
percentages = [(urban_fare/total_fare)*100,(suburban_fare/total_fare)*100,(rural_fare/total_fare)*100]
for percentage in range(len(percentages)):
    percentages[percentage]=round(percentages[percentage],1)

In [89]:
colors =["lightcoral","lightskyblue","gold"]
explode = (.11,0,0)
labels=["Urban","Suburban","Rural"]

In [90]:
%matplotlib notebook

In [91]:
# Build Pie Chart
plt.pie(percentages,explode=explode,labels=labels,colors=colors,autopct="%1.1f%%", shadow=True, startangle=270)

<IPython.core.display.Javascript object>

([<matplotlib.patches.Wedge at 0x1d8f5e84208>,
  <matplotlib.patches.Wedge at 0x1d8f5e84ba8>,
  <matplotlib.patches.Wedge at 0x1d8f5e8e588>],
 [Text(1.1149627273982459, 0.4700618220113871, 'Urban'),
  Text(-1.081158138266871, -0.20272414769660183, 'Suburban'),
  Text(-0.23320753144977086, -1.0749949987209728, 'Rural')],
 [Text(0.6542343276469046, 0.27582139969263203, '62.7%'),
  Text(-0.5897226208728387, -0.11057680783451007, '30.5%'),
  Text(-0.12720410806351135, -0.5863609083932578, '6.8%')])

In [94]:
plt.title("% of Total Fares by City Type")

Text(0.5, 1.0, '% of Total Fares by City Type')

In [95]:
# Save Figure
plt.savefig("Images/faresByCityType.png")

## Total Rides by City Type

In [96]:
# Calculate Ride Percents
relevant_df.head()

Unnamed: 0,city,fare,driver_count,type
0,Lake Jonathanshire,13.83,5,Urban
1,South Michelleport,30.24,72,Urban
2,Port Samanthamouth,33.44,57,Urban
3,Rodneyfort,23.44,34,Urban
4,South Jack,34.58,46,Urban


In [97]:
trips_per_type = relevant_df.groupby(["type"])["city"].count()
total_trips = trips_per_type.sum()

In [98]:
urban_trips = trips_per_type["Urban"]


In [99]:
suburban_trips = trips_per_type["Suburban"]

In [100]:
rural_trips = trips_per_type["Rural"]

In [101]:
percentages2 = [(urban_trips/total_trips)*100,(suburban_trips/total_trips)*100,(rural_trips/total_trips)*100]
for percentage in range(len(percentages2)):
    percentages2[percentage]=round(percentages2[percentage],1)
percentages2

[68.4, 26.3, 5.3]

In [102]:
colors =["lightcoral","lightskyblue","gold"]
explode = (.11,0,0)
labels=["Urban","Suburban","Rural"]

In [103]:
%matplotlib notebook

In [104]:
# Build Pie Chart
plt.pie(percentages2,explode = explode,labels = labels,colors = colors,autopct = "%1.1f%%",shadow = True,startangle = 270)

<IPython.core.display.Javascript object>

([<matplotlib.patches.Wedge at 0x1d8f5ed2588>,
  <matplotlib.patches.Wedge at 0x1d8f5ed2fd0>,
  <matplotlib.patches.Wedge at 0x1d8f5edd9b0>],
 [Text(1.0134088967581252, 0.6611372081281465, 'Urban'),
  Text(-1.0081526472884879, -0.44003208947213646, 'Suburban'),
  Text(-0.1823097456737021, -1.0847871480767046, 'Rural')],
 [Text(0.5946448898332799, 0.3879400146867636, '68.4%'),
  Text(-0.5499014439755388, -0.24001750334843805, '26.3%'),
  Text(-0.09944167945838295, -0.5917020807691115, '5.3%')])

In [105]:
plt.title("% of Total Rides by City Type")

Text(0.5, 1.0, '% of Total Rides by City Type')

In [106]:
# Save Figure
plt.savefig("Images/ridesByCityType")

The comparison of the total rides by city type chart and the total fare by city type chart reflects the effect of demand on price of rides. The urban section of rides by city type is larger than the urban section of fares by city type, which implies that urban rides are relatively inexpensive. In the case of rural rides, this trend is displayed backwards, which suggests that rides in rural areas are relatively expensive.

## Total Drivers by City Type

In [107]:
# Calculate Driver Percents
relevant_df.head()

Unnamed: 0,city,fare,driver_count,type
0,Lake Jonathanshire,13.83,5,Urban
1,South Michelleport,30.24,72,Urban
2,Port Samanthamouth,33.44,57,Urban
3,Rodneyfort,23.44,34,Urban
4,South Jack,34.58,46,Urban


In [108]:
total_drivers = relevant_df.drop_duplicates(subset="city", keep='first', inplace=False)["driver_count"].sum()
total_drivers

2973

In [109]:
df = relevant_df.drop_duplicates(subset="city", keep='first', inplace=False)
urban_drivers = df.loc[df["type"]=="Urban"]["driver_count"].sum()
urban_drivers

2405

In [110]:
suburban_drivers = df.loc[df["type"]=="Suburban"]["driver_count"].sum()
suburban_drivers

490

In [111]:
rural_drivers = df.loc[df["type"]=="Rural"]["driver_count"].sum()
rural_drivers

78

In [112]:
percentages3 = [(urban_drivers/total_drivers)*100,(suburban_drivers/total_drivers)*100,(rural_drivers/total_drivers)*100]
for percentage in range(len(percentages3)):
    percentages3[percentage]=round(percentages3[percentage],1)
percentages3

[80.9, 16.5, 2.6]

In [113]:
colors =["lightcoral","lightskyblue","gold"]
explode = (.1,0,0)
labels=["Urban","Suburban","Rural"]

In [114]:
%matplotlib notebook

In [115]:
# Build Pie Charts
plt.pie(percentages3,explode=explode,labels=labels,colors=colors,startangle=230,autopct="%1.1f%%",shadow = True)

<IPython.core.display.Javascript object>

([<matplotlib.patches.Wedge at 0x1d8f5f1e9b0>,
  <matplotlib.patches.Wedge at 0x1d8f5f28390>,
  <matplotlib.patches.Wedge at 0x1d8f5f28d30>],
 [Text(1.1556823493846091, 0.32310726906225784, 'Urban'),
  Text(-1.0800090815606993, -0.20875915248538132, 'Suburban'),
  Text(-0.7734610946032672, -0.782149560592548, 'Rural')],
 [Text(0.674148037141022, 0.18847924028631707, '80.9%'),
  Text(-0.5890958626694723, -0.11386862862838978, '16.5%'),
  Text(-0.4218878697836002, -0.4266270330504807, '2.6%')])

In [117]:
plt.title("% of Total Drivers by City Type")

Text(0.5, 1.0, '% of Total Drivers by City Type')

In [118]:
# Save Figure
plt.savefig("Images/driversByCityType.png")

This chart reniforces the notion of demand affecting prices: dsepite the demand for taxis being high in urban areas, the number of drivers is has a greater effect on the market.