In [1]:
# Add Matplotlib inline magic command.
%matplotlib inline

# Import dependencies.
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
import scipy.stats as sts

In [2]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [5]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head()

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban


In [4]:
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head()

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344


In [6]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [None]:
# Get the sum of all the fares.
total_fares = pyber_data_df["fare"].sum()
total_fares

In [None]:
# Get the total number of rides for each city type.
total_rides_by_type = pyber_data_df.groupby(["type"]).count()["ride_id"]
total_rides_by_type

In [None]:
# Get the total number of rides for all cities.
total_rides = pyber_data_df["ride_id"].count()
total_rides

In [None]:
# Get the total number of drivers by city type.
# Corrected the use of city_data_df instead of pyber_data_df to get the right count.
total_drivers_by_type = city_data_df.groupby(["type"]).sum()["driver_count"]
total_drivers_by_type

In [None]:
# Get the total number of drivers. Corrected the use of city_data_df instead of pyber_data_df to get the correct count.
total_drivers = city_data_df["driver_count"].sum()
total_drivers

In [None]:
# Calculate the average fare per ride.
avg_ride_fare = total_fares / total_rides
avg_ride_fare

In [None]:
avg_ride_fare_per_driver = total_fares / total_drivers
avg_ride_fare_per_driver

In [None]:
# Calculate the average fare per driver.
average_driver_fare_by_type = sum_fares_by_type / total_drivers_by_type
average_driver_fare_by_type

In [None]:
pyber_data_summary_df = pd.DataFrame({
    "Total Rides": total_rides_by_type,
    "Total Drivers": total_drivers_by_type,
    "Total Fares": total_fares,
    "Average Fare per Ride": avg_ride_fare,
    "Average Fare per Driver": avg_ride_fare_per_driver})
pyber_data_summary_df

In [None]:
# Reset column names and indices on pyber_data_df
new_column_names = {'city': 'City', 'date':'Date','fare':'Fare', 'ride_id': 'Ride Id','driver_count': 'No. Drivers', 'type':'City Type'}
pyber_data_df = pyber_data_df.rename(columns = new_column_names)
pyber_data_df.set_index('Date', inplace=True)
pyber_data_df

In [None]:

# Create a copy of pyber_data_df data frame & dropping extra columns
pyber_data_df_copy1 = pyber_data_df.copy().drop(['Ride Id','City','No. Drivers'], axis = 1)
pyber_data_df_copy1

In [None]:
# Calculate the sum() of fares by the type of city and date on a new DataFrame. 
pyber_data_df_FareSum = pyber_data_df_copy1.groupby(["City Type","Date"]).sum().reset_index()
pyber_data_df_FareSum.head(20)

In [None]:
# Create a pivot table DataFrame with the Date as the index and columns = 'City Type' with the Fare for each Date in each row
pyber_data_pivottable = pd.pivot_table(pyber_data_df_FareSum, index=['Date'],
                    columns=['City Type'], aggfunc=np.sum)
pyber_data_pivottable

In [None]:
#Create a new DataFrame from the pivot table DataFrame on the given dates '2019-01-01':'2019-04-28'
pyber_data_dates = pyber_data_pivottable.loc ['2019-01-01':'2019-04-29']
pyber_data_dates

In [None]:
# Create a new DataFrame by setting the previous DataFrame with resample() in weekly bins, and calculate the sum() of the fares for each week
pyber_data_dates.index = pyber_data_dates.index.astype('datetime64[ns]') # Set to DatetimeIndex
pyber_data_dates_resample = pyber_data_dates.resample('W').sum()
pyber_data_dates_resample

In [None]:
# Plotting final results
plt.style.use('fivethirtyeight')
pyber_data_plot = pyber_data_dates_resample.plot(figsize = (12,4))
plt.title('Total Fare by City Type')
plt.xlabel('Month')
plt.ylabel('Fare($USD)')
#plt.xlim('Jan','May')
lgnd = plt.legend(fontsize="10", mode="Expanded", scatterpoints=1, 
                  loc="best", title="City Types", labels = ['Rural','Suburban','Urban'])
lgnd.get_title().set_fontsize(10)
plt.savefig('analysis/Fig7.png')