In [1]:
# Dependencies & Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Grab data source files 
city_data = "../Pyber/data/city_data.csv"
ride_data = "../Pyber/data/ride_data.csv"

city_data_df = pd.read_csv(city_data)
ride_data_df = pd.read_csv(ride_data)

# Merge the city_data & ride_data
combined_df = pd.merge(city_data_df, ride_data_df,
                                 how='outer', on='city')
combined_df.head()

Unnamed: 0,city,driver_count,type,date,fare,ride_id
0,Richardfort,38,Urban,2018-02-24 08:40:38,13.93,5628545007794
1,Richardfort,38,Urban,2018-02-13 12:46:07,14.0,910050116494
2,Richardfort,38,Urban,2018-02-16 13:52:19,17.92,820639054416
3,Richardfort,38,Urban,2018-02-01 20:18:28,10.26,9554935945413
4,Richardfort,38,Urban,2018-04-17 02:26:37,23.0,720020655850


In [3]:
fare_data = combined_df.groupby(['type','city'])['fare'].mean()
fare_data_df = pd.DataFrame(fare_data).reset_index()
fare_data_df.head()

Unnamed: 0,type,city,fare
0,Rural,Bradshawfurt,40.064
1,Rural,Garzaport,24.123333
2,Rural,Harringtonfort,33.47
3,Rural,Jessicaport,36.013333
4,Rural,Lake Jamie,34.358333


In [4]:
rides_data = combined_df.groupby(['type','city'])['ride_id'].count()
rides_data_df = pd.DataFrame(rides_data).reset_index()
rides_data_df.head()

Unnamed: 0,type,city,ride_id
0,Rural,Bradshawfurt,10
1,Rural,Garzaport,3
2,Rural,Harringtonfort,6
3,Rural,Jessicaport,6
4,Rural,Lake Jamie,6


In [5]:
driver_data = combined_df.groupby(['type','city'])['driver_count'].sum()
driver_data_df = pd.DataFrame(driver_data).reset_index()
driver_data_df.head()

Unnamed: 0,type,city,driver_count
0,Rural,Bradshawfurt,70
1,Rural,Garzaport,21
2,Rural,Harringtonfort,24
3,Rural,Jessicaport,6
4,Rural,Lake Jamie,24


# Bubble Plot of Ride Sharing Data

In [6]:
# Obtain the x and y coordinates for each of the three city types 
Urban_cities = fare_data_df.loc[fare_data_df['type'] == "Urban"]

#Urban = plt.scatter(Urban_rides, Urban_fares, s=Urban_drivers, c="red", alpha=.25, edgecolors="grey", label="Urban")

Suburban_cities = rides_data_df.loc[rides_data_df['type'] == "Suburban"]

#Suburban = plt.scatter(Suburban_rides, Suburban_fares, s=Suburban_drivers, c="lightblue", alpha=.25, edgecolors="grey", label="Suburban")

Rural_cities = driver_data_df.loc[driver_data_df['type'] == "Rural"]

#Rural = plt.scatter(Rural_rides , Rural_fares, s=Rural_drivers, c="yellow", alpha=.25, edgecolors="grey", label="Rural")


In [8]:
Urban_cities.head()

Unnamed: 0,type,city,fare
54,Urban,Amandaburgh,24.641667
55,Urban,Barajasview,25.332273
56,Urban,Carriemouth,28.314444
57,Urban,Christopherfurt,24.501852
58,Urban,Deanville,25.842632


In [9]:
Suburban_cities.head()

Unnamed: 0,type,city,ride_id
18,Suburban,Barronchester,16
19,Suburban,Bethanyland,18
20,Suburban,Brandonfort,19
21,Suburban,Colemanland,22
22,Suburban,Davidfurt,17


In [10]:
Rural_cities.head()

Unnamed: 0,type,city,driver_count
0,Rural,Bradshawfurt,70
1,Rural,Garzaport,21
2,Rural,Harringtonfort,24
3,Rural,Jessicaport,6
4,Rural,Lake Jamie,24


In [None]:
# Build the scatter plots for each city types
plt.scatter(total_rides, avg_fare,
  s=total_drivers)
 plt.grid()

In [None]:
# Incorporate the other graph properties
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")

In [None]:
# Create a legend
plt.legend(loc="upper right", title="City Type")

In [None]:
# Incorporate a text label regarding circle size
# Note: Circle Size correlates with driver count per city 

In [None]:
# Save Figure
plt.savefig("../Pyber/RideShareAnalysis.png")
plt.show()

# Total Fares by City Type

In [None]:
# Pie Chart #1: % of Total Fares by City Type
# The values of each section of the pie chart
total_fares = combined_df.groupby(['type'])['fares'].sum()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_fares, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

In [None]:
plt.savefig("../Pyber/TotalFares_CityType.png")
plt.show()

# Total Rides by City Type

In [None]:
# Pie Chart #2: % of Total Rides by City Type
# The values of each section of the pie chart
total_rides = combined_df.groupby(['type'])['ride_id'].count()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_rides, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

In [None]:
plt.savefig("../Pyber/TotalRides_CityType.png")
plt.show()

# Total Drivers by City Type

In [None]:
# Pie Cahrt #3: % of Total Drivers by City Type
# The values of each section of the pie chart
total_drivers = combined_df.groupby(['type'])['driver_count'].sum()

# Labels for the sections of our pie chart
labels = ["Rural","Suburban","Urban" ]

# The colors of each section of the pie chart
colors = ["yellow","lightblue","red"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, 0, 0.1)

In [None]:
plt.pie(total_drivers, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")

In [None]:
plt.savefig("../Pyber/TotalDriveres_CityType.png")
plt.show()

# Check Yourself

In [None]:
# Average Fare ($) Per City
# avg_city_fares = city_group["fare"].mean()
# avg_city_fares

In [None]:
# Total Number of Rides Per City
# city_rides = city_group.ride_id.count()
# city_rides

In [None]:
# Total Number of Drivers Per City
# total_drivers_city = city_group.driver_count.sum()
# total_drivers_city

In [None]:
# City Type (Urban, Suburban, Rural)
# city_types = combined_df.groupby('type').count()
# city_types.head()