In [59]:
# Add Matplotlib inline magic command
%matplotlib notebook
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import scipy.stats as sts

In [60]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [61]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)

In [62]:
# Total number of drivers in each area 
total_number_drivers_df = pd.DataFrame(city_data_df.groupby("type").sum()["driver_count"])
total_number_drivers_df
total_number_drivers_df.reset_index()

Unnamed: 0,type,driver_count
0,Rural,78
1,Suburban,490
2,Urban,2405


In [63]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

In [64]:
# Create the Urban, Suburban, rural DataFrame.
urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]

In [65]:
# Get the number of total rides for urban, suburban, rural cities.
urban_ride_count = urban_cities_df.count()["ride_id"]
suburban_ride_count = suburban_cities_df.count()["ride_id"]
rural_ride_count = rural_cities_df.count()["ride_id"]
total_rides = {'Total Rides': [urban_ride_count, suburban_ride_count, rural_ride_count]}
total_rides_df = pd.DataFrame(total_rides, index=['Urban', 'Suburban', 'Rural'])
# total_rides_df

In [66]:
# Get total fare for each city in the urban, suburban, rural cities.
urban_total_fare = urban_cities_df.sum()["fare"]
suburban_total_fare = suburban_cities_df.sum()["fare"]
rural_total_fare = rural_cities_df.sum()["fare"]
total_fare = {'Total Fares': [urban_total_fare, suburban_total_fare, rural_total_fare]}
total_fare_df = pd.DataFrame(total_fare, index=['Urban', 'Suburban', 'Rural'])
# total_fare_df 

In [67]:
# Get average fare per ride for each city in the urban, suburban, rural cities.
urban_avg_fare = urban_cities_df.mean()["fare"]
suburban_avg_fare = suburban_cities_df.mean()["fare"]
rural_avg_farerural_avg_fare = rural_cities_df.mean()["fare"]
avg_fare_per_ride = {'Average Fare per Ride': [urban_avg_fare, suburban_avg_fare, rural_avg_farerural_avg_fare]}
avg_fare_per_ride_df = pd.DataFrame(avg_fare_per_ride, index=['Urban', 'Suburban', 'Rural'])
# avg_fare_per_ride_df

In [77]:
# Calculate Avg Fare Per Ride
average_fare_per_ride = pyber_data_df.groupby(["type"]).sum()["fare"] / pyber_data_df.groupby(["type"]).count()["ride_id"]
average_fare_per_ride_df = pd.DataFrame(avg_fare_per_ride, index=['Urban','Suburban', 'Rural'])
average_fare_per_ride_df.index.name = "type"
average_fare_per_ride_df
average_fare_per_ride_df.reset_index()

Unnamed: 0,type,Average Fare per Ride
0,Urban,24.525772
1,Suburban,30.970128
2,Rural,34.62344


In [78]:
# Combining data 1.0
combined_try_this = {'Total Fares': [urban_total_fare, suburban_total_fare, rural_total_fare],
                     'Average Fare per Ride': [urban_avg_fare, suburban_avg_fare, rural_avg_farerural_avg_fare],
                    'Total Rides': [urban_ride_count, suburban_ride_count, rural_ride_count]}
combined_try_this_df = pd.DataFrame(data=combined_try_this, index=['Urban','Suburban', 'Rural'])
combined_try_this_df.index.name = 'type'
combined_try_this_df
combined_try_this_df.reset_index()

Unnamed: 0,type,Total Fares,Average Fare per Ride,Total Rides
0,Urban,39854.38,24.525772,1625
1,Suburban,19356.33,30.970128,625
2,Rural,4327.93,34.62344,125


In [84]:
# Combining data 2.0
pyber_data_2df = pd.merge(combined_try_this_df, total_number_drivers_df, how="left", on=["type","type"])
pyber_data_3df = pd.DataFrame(pyber_data_2df)
pyber_data_3df.reset_index()


ValueError: cannot insert type, already exists

KeyError: "['type'] not found in axis"

In [35]:
# Rename columns {'city': 'City', 'date':'Date','fare':'Fare', 'ride_id': 'Ride Id','driver_count': 'No. Drivers', 'type':'City Type'}.
pyber_data_2_df = pyber_data_df.rename(columns={'city': 'City', 'date':'Date','fare':'Fare', 'ride_id': 'Ride Id','driver_count': 'No. Drivers', 'type':'City Type'})

In [36]:
# Create a new DataFrame for fares and include only the Date, City Type, and Fare columns using the copy() method on the merged DataFrame.
pyber_data_2_df

Unnamed: 0,City,Date,Fare,Ride Id,No. Drivers,City Type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
...,...,...,...,...,...,...
2370,Michaelberg,2019-04-29 17:04:39,13.38,8550365057598,6,Rural
2371,Lake Latoyabury,2019-01-30 00:05:47,20.76,9018727594352,2,Rural
2372,North Jaime,2019-02-10 21:03:50,11.11,2781339863778,1,Rural
2373,West Heather,2019-05-07 19:22:15,44.94,4256853490277,4,Rural


In [79]:
# Drop the extra Date column.

In [218]:
# Set the index to the datetime data type.
pyber_data_2_df.loc['Date']

KeyError: 'Date'

In [214]:
# Check the DataFrame using the info() method to make sure the index is a datetime data type.
pyber_data_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2375 entries, 0 to 2374
Data columns (total 6 columns):
city            2375 non-null object
date            2375 non-null object
fare            2375 non-null float64
ride_id         2375 non-null int64
driver_count    2375 non-null int64
type            2375 non-null object
dtypes: float64(1), int64(2), object(3)
memory usage: 129.9+ KB


In [196]:
# Calculate the sum() of fares by the type of city and date using groupby() to create a Series.
# Convert the groupby() Series into a DataFrame.
sum_of_fares = pd.DataFrame(pyber_data_df.groupby("type").sum()["fare"])
sum_of_fares

Unnamed: 0_level_0,fare
type,Unnamed: 1_level_1
Rural,4327.93
Suburban,19356.33
Urban,39854.38


In [220]:
# Reset the index, which is needed for for cell below.
sum_of_fares.reset_index()

Unnamed: 0,type,fare
0,Rural,4327.93
1,Suburban,19356.33
2,Urban,39854.38


In [None]:
# Create a pivot table DataFrame with the Date as the index and columns = 'City Type' with the Fare for each Date in each row. Note: There will be NaNs in some rows, which will be taken care of when you sum based on the date.


In [None]:
# Create a new DataFrame from the pivot table DataFrame on the given dates '2019-01-01':'2019-04-28' using loc .


In [None]:
# Create a new DataFrame by setting the DataFrame you created in Step 11 with resample() in weekly bins, and calculate the sum() of the fares for each week.


In [None]:
# Using the object-oriented interface method, plot the DataFrame you created in Step 12 using the df.plot() function.
    # Things to consider with your plotting:
    # Import the style from Matplotlib.
    # Use the graph style fivethirtyeight.
    # Add a title.
    # Add x- and y-axes labels according to the final figure.
    # Save the figure to the “analysis” folder.
    # Make the figure size large enough so it’s not too small.