In [2]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_data_to_load = "data/city_data.csv"
ride_data_to_load = "data/ride_data.csv"

# Read the City and Ride Data
city_df = pd.read_csv(city_data_to_load)
ride_df = pd.read_csv(ride_data_to_load)

# Combine the data into a single dataset
# combine_city_ride_data_df = pd.merge(city_data_to_load, ride_data_to_load, on = "City")
# city_df = city_df.drop_duplicates('city', keep = 'first')
# Display the data table for preview
# combine_city_ride_data_df.head()
merged_df = pd.merge(ride_df, city_df, on = 'city', how='left')
merged_df.head(5)

FileNotFoundError: [Errno 2] File b'data/city_data.csv' does not exist: b'data/city_data.csv'

## Bubble Plot of Ride Sharing Data

In [None]:
#group be city name
by_city = merged_df.groupby('city')

# var holding avg fare by city
avg_fare = by_city.mean()['fare']

#var holding rides by city
ride_count = by_city['ride_id'].count()

# num of drivers
driver_count = by_city.mean()['driver_count']

# city type data
city_type = city_df.set_index('city')['type']

# dataframe from above series
city_info = pd.DataFrame({
    "Number of Rides": ride_count,
    "Average Fare": avg_fare,
    "Number of Drivers": driver_count,
    "Type of City": city_type
})

# city_info.sort_values('Number of Drivers', ascending = False)

#separated dfs for city categories
rural = city_info[city_info['Type of City'] == 'Rural']
suburban = city_info[city_info['Type of City'] == 'Suburban']
urban = city_info[city_info['Type of City'] == 'Urban']

#Color Dictionary 
color_scheme = {'Gold':'#FFD700', 'Sky Blue':'#87CEFA', 'Light Coral':'#F08080'}
city_color = {'Urban': color_scheme['Light Coral'], 'Suburban': color_scheme['Sky Blue'], 'Rural': color_scheme['Gold']}

plt.suptitle('Pyber Ride Sharing Data')

# three scatter plots for each city type
plt.scatter(urban['Number of Rides'], urban['Average Fare'], s = urban['Number of Drivers']*10, color = city_color['Urban'], edgecolor = 'black', label = 'Urban', alpha = .75)
plt.scatter(suburban['Number of Rides'], suburban['Average Fare'], s = suburban['Number of Drivers']*10, color = city_color['Suburban'], edgecolor = 'black', label = 'Suburban', alpha = .75)
plt.scatter(rural['Number of Rides'], rural['Average Fare'], s = rural['Number of Drivers']*10, color = city_color['Rural'], edgecolor = 'black', label = 'Rural', alpha = .75)

#print scatter plot
plt.title('Note : Circle size correlates with Driver count per City')
plt.xlabel('Number of Rides per City')
plt.ylabel('Average Fare per City')

legend = plt.legend(frameon = True, edgecolor = 'black')
# plt.legend(handles=[urban, suburban, rural], loc="best")
legend.legendHandles[0]._sizes = [75]
legend.legendHandles[1]._sizes = [75]
legend.legendHandles[2]._sizes = [75]
plt.grid()
plt.show()

## Total Fares by City Type

In [None]:
# Calculate Type Percents
by_type = merged_df.groupby('type')['type','fare','ride_id','driver_count']
sum_of_fare = by_type.sum()['fare']

# Build Pie Chart
labels = sum_of_fare.index
colors = [city_color[n] for n in labels]
explode = [0 ,0, .2]
plt.pie(sum_of_fare, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=160)
# Save Figure
plt.suptitle('% of Total Fares by City Type')
# plt.title('% of Total Fares by City Type')
plt.show()

## Total Rides by City Type

In [None]:
# Calculate Ride Percents
ride_sum = by_type.count()['ride_id']
labels = ride_sum.index
# Build Pie Chart
colors = [city_color[n] for n in labels]
explode = [0 ,0, .2]
plt.pie(ride_sum, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Save Figure
plt.suptitle('% of Total Rides by City Type')
plt.show()

## Total Drivers by City Type

In [None]:
# Calculate Driver Percents
driver_sum = city_df.groupby('type').sum()['driver_count']
labels = driver_sum.index
# Build Pie Charts
colors = [city_color[n] for n in labels]
explode = [0 ,0, .2]
plt.pie(driver_sum, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
# Save Figure
plt.suptitle('% of Total Drivers by City Type')
plt.show()

In [None]:
# Save the plot and display it
plt.savefig("../Desktop/Homework/Pyber.png")
plt.show()