In [None]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_data_to_load = "data/city_data.csv"
ride_data_to_load = "data/ride_data.csv"

In [None]:
# Read the City Data and display summary df
city_df = pd.read_csv(city_data_to_load)

city_df.head()

In [None]:
# Create a df to be used for calculations in the final pie chart
city_urban_df = city_df.loc[city_df["type"] == "Urban", :]

city_urban_df.head()

In [None]:
# Create a df to be used for calculations in the final pie chart
city_suburban_df = city_df.loc[city_df["type"] == "Suburban", :]

city_suburban_df.head()

In [None]:
# Create a df to be used for calculations in the final pie chart
city_rural_df = city_df.loc[city_df["type"] == "Rural", :]

city_rural_df.head()

In [None]:
# Read the Ride Data and display summary df
ride_df = pd.read_csv(ride_data_to_load)

ride_df.head()

In [None]:
# Combine the data into a single dataset and display summary df
combined_pyber_df = pd.merge(city_df, ride_df,
                                 how='right', on='city')
combined_pyber_df.head()

In [None]:
# Get a list of how the columns are listed
list(combined_pyber_df)

In [None]:
# Rename the columns
combined_pyber_df = combined_pyber_df.rename(
    columns={"city": "Ride City", "driver_count": "Driver Count", "type": "Geographic Area", "date": "Ride Date", "fare": "Fare", "ride_id": "Ride ID Number"})

In [None]:
# Reorganize the columns
combined_pyber_df = combined_pyber_df[["Ride City", "Ride Date", "Fare", "Ride ID Number", "Driver Count", "Geographic Area"]]

In [None]:
# Display summary df
combined_pyber_df.head()

## Bubble Plot of Ride Sharing Data

In [None]:
# Get names of Geographic Areas
combined_pyber_df["Geographic Area"].unique()

In [None]:
# Create a df that filters out all Geographic Areas besides Urban
urban_df = combined_pyber_df.loc[combined_pyber_df["Geographic Area"] == "Urban", :]

urban_df.head()

In [None]:
# Take our urban df based on geographic data and make a new df based upon "Ride City"
urban_ride_city_groupby_df = urban_df.groupby(['Ride City'])
urban_ride_city_groupby_df.head()

In [None]:
# Calculate total urban rides
urban_total_rides = urban_ride_city_groupby_df['Ride ID Number'].count()
urban_total_rides.head()

In [None]:
# Calculate total urban fares
urban_avg_fare = urban_ride_city_groupby_df['Fare'].mean()
urban_avg_fare.head()

In [None]:
# Calculate total urban drivers
urban_total_drivers = urban_ride_city_groupby_df['Driver Count'].mean()
urban_total_drivers.head()

In [None]:
# Create a df that filters out all Geographic Areas besides Suburban
suburban_df = combined_pyber_df.loc[combined_pyber_df["Geographic Area"] == "Suburban", :]

suburban_df.head()

In [None]:
# Take oursuburban df based on geographic data and make a new df based upon "Ride City"
suburban_ride_city_groupby_df = suburban_df.groupby(['Ride City'])
suburban_ride_city_groupby_df.head()

In [None]:
# Calculate total suburban rides
suburban_total_rides = suburban_ride_city_groupby_df['Ride ID Number'].count()
suburban_total_rides.head()

In [None]:
# Calculate total suburban fares
suburban_avg_fare = suburban_ride_city_groupby_df['Fare'].mean()
suburban_avg_fare.head()

In [None]:
# Calculate total suburban drivers
suburban_total_drivers = suburban_ride_city_groupby_df['Driver Count'].mean()
suburban_total_drivers.head()

In [None]:
# Create a df that filters out all Geographic Areas besides Rural
rural_df = combined_pyber_df.loc[combined_pyber_df["Geographic Area"] == "Rural", :]

rural_df.head()

In [None]:
# Take our rural df based on geographic data and make a new df based upon "Ride City"
rural_ride_city_groupby_df = rural_df.groupby(['Ride City'])
rural_ride_city_groupby_df.head()

In [None]:
# Calculate total rural rides
rural_total_rides = rural_ride_city_groupby_df['Ride ID Number'].count()
rural_total_rides.head()

In [None]:
# Calculate total rural fares
rural_avg_fare = rural_ride_city_groupby_df['Fare'].mean()
rural_avg_fare.head()

In [None]:
# Calculate total rural drivers
rural_total_drivers = rural_ride_city_groupby_df['Driver Count'].mean()
rural_total_drivers.head()

In [None]:
# Tell matplotlib to create a scatter plot based upon the above data setting the marker type, marker size, fill color, trasparancy and edge color
Urban = plt.scatter(urban_total_rides, urban_avg_fare, marker="o", s=urban_total_drivers*10, facecolor='#F29C7A', alpha=.80, edgecolors="black")
Suburban = plt.scatter(suburban_total_rides, suburban_avg_fare, marker="o", s=suburban_total_drivers*10, facecolors='#AAD6EC', alpha=.80, edgecolors="black")
Rural = plt.scatter(rural_total_rides, rural_avg_fare, marker="o", s=rural_total_drivers*10, facecolors='#FBDE59', alpha=.80, edgecolors="black")

# Defining the legends values
plt.legend((Urban, Suburban, Rural), ('Urban', 'Suburban', 'Rural'), scatterpoints=1, markerscale=1, loc="best", title="City Types")

# Add text for the reason for the varying sizes
t1 = "Note:"
t2 = "Circle size correlates with driver count per city."
plt.text(42, 36.5, t1, ha='left')
plt.text(42, 35, t2, ha='left')

# Create labels for the X and Y axis
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")

# Formatting grid and tight layout
plt.grid()

# Save an image of our scatterplot
plt.savefig("../Images/Pyber_Ride_Sharing_Data.png")

# Display Scatter plot
plt.show()

## Total Fares by City Type

In [None]:
# Calculate Type Percents
urban_fare_total = urban_df['Fare'].sum()

suburban_fare_total = suburban_df['Fare'].sum()

rural_fare_total = rural_df['Fare'].sum()

total_fares = combined_pyber_df['Fare'].sum()

urban_fare_pct = urban_fare_total/total_fares

suburban_fare_pct = suburban_fare_total/total_fares

rural_fare_pct = rural_fare_total/total_fares

In [None]:
# Build Pie Chart

# Set Pie Chart Variables
city_type = ["Urban", "Rural", "Suburban"]
fare_pct = [urban_fare_pct, rural_fare_pct, suburban_fare_pct]
colors = ["#E28683", "#F9D649", "#96CDF6"]
explode = (0.1,0,0)

# Tell matplotlib to put the pie chart together
plt.pie(fare_pct, explode=explode, labels=city_type, colors=colors, autopct="%1.1f%%", shadow=True, startangle=280)

# Create a title for the pie chart
plt.title("% of Total Fares by City Type")

# Save an image of our chart
plt.savefig("../Images/Pyber_Total_Fares_by_City_Type.png")

# Show Figure
plt.show()

## Total Rides by City Type

In [None]:
# Calculate Ride Percents by City Type
urban_rides_total = urban_df['Ride ID Number'].sum()

suburban_rides_total = suburban_df['Ride ID Number'].sum()

rural_rides_total = rural_df['Ride ID Number'].sum()

total_rides = combined_pyber_df['Ride ID Number'].sum()

urban_rides_pct = urban_rides_total/total_rides

suburban_rides_pct = suburban_rides_total/total_rides

rural_rides_pct = rural_rides_total/total_rides

In [None]:
# Build Pie Chart

# Set Pie Chart Variables
city_type = ["Urban", "Rural", "Suburban"]
ride_pct = [urban_rides_pct, rural_rides_pct, suburban_rides_pct]
colors = ["#E28683", "#F9D649", "#96CDF6"]
explode = (0.1,0,0)

# Tell matplotlib to put the pie chart together
plt.pie(ride_pct, explode=explode, labels=city_type, colors=colors, autopct="%1.1f%%", shadow=True, startangle=260)

# Create a title for the pie chart
plt.title("% of Total Rides by City Type")

# Save an image of our chart
plt.savefig("../Images/Pyber_Total_Rides_by_City_Type.png")

# Show Figure
plt.show()

## Total Drivers by City Type

In [None]:
# Calculate Driver Percents by City Type
urban_driver_total = city_urban_df['driver_count'].sum()

suburban_driver_total = city_suburban_df['driver_count'].sum()

rural_driver_total = city_rural_df['driver_count'].sum()

total_drivers = city_df['driver_count'].sum()

urban_driver_pct = urban_driver_total/total_drivers

suburban_driver_pct = suburban_driver_total/total_drivers 

rural_driver_pct = rural_driver_total/total_drivers

In [None]:
# Build Pie Chart

# Set Pie Chart Variables
city_type = ["Urban", "Rural", "Suburban"]
driver_pct = [urban_driver_pct, rural_driver_pct, suburban_driver_pct]
colors = ["#E28683", "#F9D649", "#96CDF6"]
explode = (0.1,0,0)

# Tell matplotlib to put the pie chart together
plt.pie(driver_pct, explode=explode, labels=city_type, colors=colors, autopct="%1.1f%%", shadow=True, startangle=235)

# Create a title for the pie chart
plt.title("% of Total Drivers by City Type")

# Save an image of our chart
plt.savefig("../Images/Pyber_Total_Drivers_by_City_Type.png")

# Show Figure
plt.show()

Three Observable Trends:
1. Urban ride fares appear to stay under an average of $30 per trip across all cities.
2. It appears that nearly all cities with a City Type of Urban had over 15 rides per city and make up 80 percent of the total Pyber drivers.
3. Though Rural and Suburban Pyber drivers make up less than 20 percent of the total drivers these two categories combined make up 37.3 percent of the total income from fares.