In [None]:
%matplotlib inline

# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

# File to Load (Remember to change these)
city_data_to_load = os.path.join("data","city_data.csv")
ride_data_to_load = os.path.join("data","ride_data.csv")

# Read the City and Ride Data
city_data = pd.read_csv(city_data_to_load)
ride_data = pd.read_csv(ride_data_to_load)

# Combine the data into a single dataset
combined_ride_data = pd.merge(ride_data, city_data, on="city")


# Display the data table for preview
combined_ride_data.head()

## Bubble Plot of Ride Sharing Data

In [None]:
# Obtain the x and y coordinates for each of the three city types
grouped_ride_data = combined_ride_data.groupby(['city'])
ride_count = grouped_ride_data ["ride_id"].count()
avg_fare = grouped_ride_data ["fare"].mean()
grouped_type = grouped_ride_data ['type'].first()
driver_count = grouped_ride_data ['driver_count'].first()
ride_count_df = pd.DataFrame(ride_count)
ride_count_df ["Average Fare ($)"] = avg_fare
ride_count_df ["Type"] = grouped_type
ride_count_df ["Driver Count"] = driver_count
ride_count_df = ride_count_df.rename(columns={'ride_id':'Total Number of Rides (Per City)'})
rural_ride_count = ride_count_df.loc[ride_count_df["Type"] == "Rural"]
suburban_ride_count = ride_count_df.loc[ride_count_df["Type"] == "Suburban"]
urban_ride_count = ride_count_df.loc[ride_count_df["Type"] == "Urban"]

# Build the scatter plots for each city types
plt.scatter(rural_ride_count ['Total Number of Rides (Per City)'], rural_ride_count ['Average Fare ($)'], marker="o", facecolors="gold", edgecolors="black", s= 10 * rural_ride_count ['Driver Count'], alpha=0.75, label="Rural", linewidth = 1)
plt.scatter(suburban_ride_count ['Total Number of Rides (Per City)'], suburban_ride_count ['Average Fare ($)'], marker="o", facecolors="skyblue", edgecolors="red", s= 10 *suburban_ride_count ['Driver Count'], alpha=0.85, label="Suburban", linewidth = 0.75)
plt.scatter(urban_ride_count ['Total Number of Rides (Per City)'], urban_ride_count ['Average Fare ($)'], marker="o", facecolors="coral", edgecolors="green", s= 10 * urban_ride_count ['Driver Count'], alpha=1 , label="Urban", linewidth = 1.25)


# Incorporate the other graph properties
plt.grid()
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")

# Create a legend
lgnd = plt.legend(loc="upper right", scatterpoints=1, fontsize=10, title = 'City Types')
lgnd.legendHandles[0]._sizes = [30]
lgnd.legendHandles[1]._sizes = [30]
lgnd.legendHandles[2]._sizes = [30]
# Incorporate a text label regarding circle size

plt.annotate('Note:', xy=(1.05, 0.60), xytext=(0, 10), xycoords=('axes fraction', 'figure fraction'), textcoords='offset points', size=10, ha='left', va='bottom')
plt.annotate('Circle size correlates with driver count per city', xy=(1.05, 0.55), xytext=(0, 10), xycoords=('axes fraction', 'figure fraction'), textcoords='offset points', size=10, ha='left', va='bottom')




# Save Figure
plt.savefig(os.path.join("images","ride_sharing.png"), bbox_inches = "tight")

In [None]:
# Show plot
plt.show()

## Total Fares by City Type

In [None]:
# Calculate Type Percents
city_type_data = combined_ride_data.groupby(['type'])
fares_by_type = city_type_data ["fare"].sum()
fares_by_type_percent = fares_by_type / combined_ride_data ["fare"].sum() * 100
city_type_df=pd.DataFrame(fares_by_type_percent)
city_type_df.reset_index(level=0, inplace=True)
# Build Pie Chart

plt.pie(city_type_df['fare'], labels = city_type_df['type'], colors = ["gold", "skyblue", "coral"], explode = (0, 0, 0.2), autopct="%1.1f%%", shadow=True, startangle=160)

plt.title("% of Total Fares by City Type")
plt.axis("equal")

# Save Figure
plt.savefig(os.path.join("images","fares_city_type.png"))


In [None]:
# Show Figure
plt.show()

## Total Rides by City Type

In [None]:
# Calculate Ride Percents
rides_by_type = city_type_data ["ride_id"].count()
rides_by_type_percent = rides_by_type / combined_ride_data ["ride_id"].count() * 100
rides_type_df=pd.DataFrame(rides_by_type_percent)
rides_type_df.reset_index(level=0, inplace=True)

# Build Pie Chart
plt.pie(rides_type_df['ride_id'], labels = rides_type_df['type'], colors = ["gold", "skyblue", "coral"], explode = (0, 0, 0.2), autopct="%1.1f%%", shadow=True, startangle=160)
plt.title("% of Total Rides by City Type")
plt.axis("equal")

# Save Figure
plt.savefig(os.path.join("images","rides_city_type.png"))

In [None]:
# Show Figure
plt.show()

## Total Drivers by City Type

In [None]:
# Calculate Driver Percents
city_data = combined_ride_data.groupby(['type', 'city'])
drivers_by_city = city_data ["driver_count"].first()
drivers_by_city_grouped = drivers_by_city.groupby (level=0)
drivers_by_type = drivers_by_city_grouped.sum()
drivers_by_type_percent = drivers_by_type / drivers_by_city.sum() * 100


# Build Pie Charts
drivers_type_df=pd.DataFrame(drivers_by_type_percent)
drivers_type_df.reset_index(level=0, inplace=True)


plt.pie(drivers_type_df['driver_count'], labels = drivers_type_df['type'], colors = ["gold", "skyblue", "coral"], explode = (0, 0, 0.2), autopct="%1.1f%%", shadow=True, startangle=160)
plt.title("% of Total Drivers by City Type")
plt.axis("equal")



# Save Figure
plt.savefig(os.path.join("images","drivers_city_type.png"))

In [None]:
# Show Figure
plt.show()