In [1]:
# Dependencies & Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Grab data source files 
city_data = "../Pyber/data/city_data.csv"
ride_data = "../Pyber/data/ride_data.csv"

city_data_df = pd.read_csv(city_data)
ride_data_df = pd.read_csv(ride_data)

# Merge the city_data & ride_data
combined_df = pd.merge(city_data_df, ride_data_df,
                                 how='outer', on='city')
combined_df.head()

Unnamed: 0,city,driver_count,type,date,fare,ride_id
0,Richardfort,38,Urban,2018-02-24 08:40:38,13.93,5628545007794
1,Richardfort,38,Urban,2018-02-13 12:46:07,14.0,910050116494
2,Richardfort,38,Urban,2018-02-16 13:52:19,17.92,820639054416
3,Richardfort,38,Urban,2018-02-01 20:18:28,10.26,9554935945413
4,Richardfort,38,Urban,2018-04-17 02:26:37,23.0,720020655850


In [3]:
# Split up our data into groups based upon 'type' and 'city'
rs_data = combined_df.groupby(['type','city'])

In [4]:
# Create a new variable that holds the avgerage fare by type/ cities
avg_fare = rs_data['fare'].mean()
avg_fare.head()

type   city          
Rural  Bradshawfurt      40.064000
       Garzaport         24.123333
       Harringtonfort    33.470000
       Jessicaport       36.013333
       Lake Jamie        34.358333
Name: fare, dtype: float64

In [5]:
# Create a new variable that holds the sum of fares for type/ cities
total_fare = rs_data['fare'].sum()
total_fare.head()

type   city          
Rural  Bradshawfurt      400.64
       Garzaport          72.37
       Harringtonfort    200.82
       Jessicaport       216.08
       Lake Jamie        206.15
Name: fare, dtype: float64

In [6]:
# Create a new variable that holds the count of rides for type/ cities
total_rides = rs_data['ride_id'].count()
total_rides.head()

type   city          
Rural  Bradshawfurt      10
       Garzaport          3
       Harringtonfort     6
       Jessicaport        6
       Lake Jamie         6
Name: ride_id, dtype: int64

In [7]:
# Create a new variable that holds the sum of drivers for type/ cities
total_drivers = rs_data['driver_count'].sum()
total_drivers.head()

type   city          
Rural  Bradshawfurt      70
       Garzaport         21
       Harringtonfort    24
       Jessicaport        6
       Lake Jamie        24
Name: driver_count, dtype: int64

# Bubble Plot of Ride Sharing Data

In [8]:
# Obtain the x and y coordinates for each of the three city types
# x_axis is total_rides
# y_axis is avg_fare

# Build the scatter plots for each city types
plt.scatter(total_rides, avg_fare,
 s=total_drivers)
plt.grid()

<IPython.core.display.Javascript object>

In [9]:
# Incorporate the other graph properties
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")



Text(58.902777777777786, 0.5, 'Average Fare ($)')

In [10]:
# Create a legend
plt.legend(loc="upper right", title="City Types")

<matplotlib.legend.Legend at 0x260cb1cc2e8>

In [None]:
# Incorporate a text label regarding circle size
# Note: Circle Size correlates with driver count per city 

In [None]:
# Save Figure
plt.savefig("../Pyber/RideSharePlot.png")
plt.show()

# Total Fares by City Type

In [None]:
# Pie Chart #1: % of Total Fares by City Type
# The values of each section of the pie chart
total_fares = combined_df.groupby(['type'])['fares'].sum()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_fares, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

In [None]:
plt.savefig("../Pyber/TotalFares_CityType.png")
plt.show()

# Total Rides by City Type

In [None]:
# Pie Chart #2: % of Total Rides by City Type
# The values of each section of the pie chart
total_rides = combined_df.groupby(['type'])['ride_id'].count()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_rides, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

In [None]:
plt.savefig("../Pyber/TotalRides_CityType.png")
plt.show()

# Total Drivers by City Type

In [None]:
# Pie Cahrt #3: % of Total Drivers by City Type
# The values of each section of the pie chart
total_drivers = combined_df.groupby(['type'])['driver_count'].count()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_drivers, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

In [None]:
plt.savefig("../Images/TotalDriveres_CityType.png")
plt.show()

# Check Yourself

In [None]:
# Average Fare ($) Per City
# avg_city_fares = city_group["fare"].mean()
# avg_city_fares

In [None]:
# Total Number of Rides Per City
# city_rides = city_group.ride_id.count()
# city_rides

In [None]:
# Total Number of Drivers Per City
# total_drivers_city = city_group.driver_count.sum()
# total_drivers_city

In [None]:
# City Type (Urban, Suburban, Rural)
# city_types = combined_df.groupby('type').count()
# city_types.head()