In [None]:
#Description:
#   Pyber Matplotlib Homework using Pandas Library and Jupyter Notebook. 
#   Show multiple plots described by comments in each section.  
#
#Modification History:
#   DD-MMM-YYY  Author          Description
#   26-06-2019  Stacey Smith    INITIAL CREATION



#Written description of three observable trends based on the data:
#   1.  Urban areas accounted for a significant amount of total fare amount, drivers, and rides.
#   2.  There are more rural drivers per rural riders than in other areas.
#   3.  Rural and suburban riders pay more per ride than in urban areas. 

In [None]:
%matplotlib inline

# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_path = "Resources/city_data.csv"
ride_path = "Resources/ride_data.csv"

# Read the City and Ride Data
city_df = pd.read_csv(city_path, encoding="ISO-8859-1")
ride_df = pd.read_csv(ride_path, encoding="ISO-8859-1")

# Combine the data into a single dataset
pyber_df = pd.merge(city_df, ride_df, on="city")

# Display the data table for preview
pyber_df.head()

In [None]:
#clean up data if needed
pyber_df['type'].value_counts()

#pyber_df.count()

## Bubble Plot of Ride Sharing Data

In [None]:
#Looking at the total number of rides per city and average fare
#Obtain the x and y coordinates for each of the three city types
#
#For this plot, I need to know each city's type, the total number of rides per city, the average fare per city
#The x axis will show the total number of rides per  (xrides)
#The y axis will show the avg fare per city (xavg_fare)
#The circle size correlates with driver count per city (xdriver)

utype = pyber_df[pyber_df['type']=='Urban']
ucities = utype.groupby([pyber_df['city']])
urides = ucities['fare'].count()
uavg_fare = ucities['fare'].mean()
udriver = ucities['driver_count'].mean()

stype = pyber_df[pyber_df['type']=='Suburban']
scities = stype.groupby([pyber_df['city']])
srides = scities['fare'].count()
savg_fare = scities['fare'].mean()
sdriver = scities['driver_count'].mean()

rtype = pyber_df[pyber_df['type']=='Rural']
rcities = rtype.groupby([pyber_df['city']])
rrides = rcities['fare'].count()
ravg_fare = rcities['fare'].mean()
rdriver = rcities['driver_count'].mean()    

In [None]:
#Build the scatter plots for each city types
plt.scatter(urides, uavg_fare, marker="o", facecolors="coral", edgecolors="black", alpha=0.75, s=udriver*5, label='Urban') 
plt.scatter(rrides, ravg_fare, marker="o", facecolors="gold", edgecolors="black", alpha=1, s=rdriver*5, label='Rural') 
plt.scatter(srides, savg_fare, marker="o", facecolors="skyblue", edgecolors="black", alpha=0.75, s=sdriver*5, label='Suburban') 

#Incorporate the other graph properties
plt.grid()

plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")
plt.title("Pyber Ride Sharing Data (2018)")

#Create a legend
legend = plt.legend(fontsize = 8, title= "City Types", loc="best") 
legend.legendHandles[0]._sizes = [35]
legend.legendHandles[1]._sizes = [35]
legend.legendHandles[2]._sizes = [35]

#Incorporate a text label regarding circle size
plt.text(42,35,"Note: \nCircle size correlates with driver count per city.", fontsize = 10)

#Save Figure
plt.savefig("Images/PyberRideSharingData.png", bbox_inches="tight")

#Show plot
plt.show()

## Total Fares by City Type

In [None]:
#Calculate Type Percents 
#grand_total = pyber_df['fare'].sum()

city_type = pyber_df.groupby(['type'])
city_sums = city_type['fare'].sum()

In [None]:
#Build Pie Chart

labels = ["Rural", "Suburban", "Urban"]

colors = ["gold", "skyblue", "coral"]

explode = (0, 0, 0.1)

plt.pie(city_sums, explode=explode, labels=labels, colors=colors, autopct="%1.1f%%", shadow=True, startangle=140)

plt.axis("equal")

plt.title("% of Total Fares by City Type")

# Save Figure
plt.savefig("Images/TotalFaresByCityType.png", bbox_inches="tight")

#Show plot
plt.show()

## Total Rides by City Type

In [None]:
#Calculate Ride Percents
city_rides = city_type['fare'].count()


In [None]:
#Build Pie Chart
labels = ["Rural", "Suburban", "Urban"]

colors = ["gold", "skyblue", "coral"]

explode = (0, 0, 0.1)

plt.pie(city_rides, explode=explode, labels=labels, colors=colors, autopct="%1.1f%%", shadow=True, startangle=140)

plt.axis("equal")

plt.title("% of Total Rides by City Type")

# Save Figure
plt.savefig("Images/TotalRidesByCityType.png", bbox_inches="tight")

#Show plot
plt.show()

## Total Drivers by City Type

In [None]:
# Calculate Driver Percents
city_driver = city_df.groupby(['type'])
driver_count = city_driver['driver_count'].sum()
driver_count


In [None]:
#Build Pie Chart
labels = ["Rural", "Suburban", "Urban"]

colors = ["gold", "skyblue", "coral"]

explode = (0, 0, 0.1)

plt.pie(driver_count, explode=explode, labels=labels, colors=colors, autopct="%1.1f%%", shadow=True, startangle=140)

plt.axis("equal")

plt.title("% of Total Drivers by City Type")

# Save Figure
plt.savefig("Images/TotalDriversByCityType.png", bbox_inches="tight")

#Show plot
plt.show()
