In [1]:
# Dependencies & Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Grab data source files 
city_data = "../Pyber/data/city_data.csv"
ride_data = "../Pyber/data/ride_data.csv"

city_data_df = pd.read_csv(city_data)
ride_data_df = pd.read_csv(ride_data)

# Merge the city_data & ride_data
combined_df = pd.merge(city_data_df, ride_data_df,
                                 how='outer', on='city')
combined_df.head()

Unnamed: 0,city,driver_count,type,date,fare,ride_id
0,Richardfort,38,Urban,2018-02-24 08:40:38,13.93,5628545007794
1,Richardfort,38,Urban,2018-02-13 12:46:07,14.0,910050116494
2,Richardfort,38,Urban,2018-02-16 13:52:19,17.92,820639054416
3,Richardfort,38,Urban,2018-02-01 20:18:28,10.26,9554935945413
4,Richardfort,38,Urban,2018-04-17 02:26:37,23.0,720020655850


# Bubble Plot of Ride Sharing Data

In [None]:
# Obtain the x and y coordinates for each of the three city types
Urban_data = combined_df.loc[combined_df['type'] == "Urban"]
Urban_fares = Urban_data.groupby('city')['fare'].mean()
Urban_rides = Urban_data.groupby('city')['ride_id'].count()
Urban_drivers = Urban_data.groupby('city')['driver_count'].sum()

Suburban_data = combined_df.loc[combined_df['type'] == "Suburban"]
Suburban_fares = Suburban_data.groupby('city')['fare'].mean()
Suburban_rides = Suburban_data.groupby('city')['ride_id'].count()
Suburban_drivers = Suburban_data.groupby('city')['driver_count'].sum()

Rural_data = combined_df.loc[combined_df['type'] == "Rural"]
Rural_fares = Rural_data.groupby('city')['fare'].mean()
Rural_rides = Rural_data.groupby('city')['ride_id'].count()
Rural_drivers = Rural_data.groupby('city')['driver_count'].sum()

# Build the scatter plots for each city types
Urban = plt.scatter(Urban_rides, Urban_fares, s=Urban_drivers, c="red", alpha=.25, edgecolors="grey", label="Urban")
Suburban = plt.scatter(Suburban_rides, Suburban_fares, s=Suburban_drivers, c="lightblue", alpha=.5, edgecolors="grey", label="Suburban")
Rural = plt.scatter(Rural_rides, Rural_fares, s=Rural_drivers, c="yellow", alpha=.75, edgecolors="grey", label="Rural")
plt.grid()

In [None]:
# Incorporate the other graph properties
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")

# Create a legend
plt.legend(loc='upper right', title="City Types")

lgnd = plt.legend(fontsize="small", mode="Expanded",
                  numpoints=1, scatterpoints=1,
                  loc="best", title="City Types",
                  labelspacing=0.5)
lgnd.legendHandles[0]._sizes = [30]
lgnd.legendHandles[1]._sizes = [30]
lgnd.legendHandles[2]._sizes = [30]

In [None]:
# Incorporate a text label regarding circle size
plt.text(42, 35, "Note:\nCircle size correlates with driver count per city.")

In [None]:
# Save Figure
plt.savefig("../Pyber/RideSharePlot.png")
plt.show()

# Total Fares by City Type

In [None]:
# Pie Chart #1: % of Total Fares by City Type
# The values of each section of the pie chart
total_fares = combined_df.groupby(['type'])['fare'].sum()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_fares, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

plt.title("Total Fares by City Type")

In [None]:
plt.savefig("../Pyber/TotalFares_CityType.png")
plt.show()

# Total Rides by City Type

In [None]:
# Pie Chart #2: % of Total Rides by City Type
# The values of each section of the pie chart
total_rides = combined_df.groupby(['type'])['ride_id'].count()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_rides, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

plt.title("Total Rides by City Type")

In [None]:
plt.savefig("../Pyber/TotalRides_CityType.png")
plt.show()

# Total Drivers by City Type

In [None]:
# Pie Cahrt #3: % of Total Drivers by City Type
# The values of each section of the pie chart
total_drivers = combined_df.groupby(['type'])['driver_count'].count()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_drivers, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

plt.title("Total Drivers by City Type")

In [None]:
plt.savefig("../Pyber/TotalDriveres_CityType.png")
plt.show()