In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Data files
ride_data_orig = "Resources/Metro_Bike_Share_Trip_Data.csv"
ride_data_17_Q2 = "Resources/la_metro_gbfs_trips_Q2_2017.csv"
ride_data_17_Q3 = "Resources/metro-bike-share-trips-2017-q3.csv"
ride_data_17_Q4 = "Resources/metro-bike-share-trips-2017-q4-v2.csv"
ride_data_18_Q1 = "Resources/metro-bike-share-trips-2018-q1.csv"
weather_data = "Resources/la_weather_2016_2018.csv"

ride_orig_df = pd.read_csv(ride_data_orig, low_memory=False)
ride_17_Q2_df = pd.read_csv(ride_data_17_Q2, low_memory=False)
ride_17_Q3_df = pd.read_csv(ride_data_17_Q3, low_memory=False)
ride_17_Q4_df = pd.read_csv(ride_data_17_Q4, low_memory=False)
ride_18_Q1_df = pd.read_csv(ride_data_18_Q1, low_memory=False)
weather_data_df = pd.read_csv(weather_data, low_memory=False)

# Make the start time column headers the same
ride_orig_df.rename(columns={'Start Time':'start_time'}, inplace=True)

# Convert to datetime objects (first dataframe contains data from 7/16 to 3/17)
ride_orig_df['start_time'] = pd.to_datetime(ride_orig_df['start_time'], format='%m/%d/%Y %I:%M:%S %p')
ride_17_Q2_df['start_time'] = pd.to_datetime(ride_17_Q2_df['start_time'], format='%m/%d/%y %H:%M')
ride_17_Q3_df['start_time'] = pd.to_datetime(ride_17_Q3_df['start_time'], format='%m/%d/%y %H:%M')
ride_17_Q4_df['start_time'] = pd.to_datetime(ride_17_Q4_df['start_time'], format='%m/%d/%y %H:%M')
ride_18_Q1_df['start_time'] = pd.to_datetime(ride_18_Q1_df['start_time'], format='%m/%d/%y %H:%M')


In [2]:
### Create a column in the dataframes to enable binning.
### Convert the time to total minutes (seconds are ignored)
### and combine dataframes into one

# Get total minutes
total_minutes_orig = ride_orig_df['start_time'].dt.minute + (ride_orig_df['start_time'].dt.hour * 60)
total_minutes_17_Q2 = ride_17_Q2_df['start_time'].dt.minute + (ride_17_Q2_df['start_time'].dt.hour * 60)
total_minutes_17_Q3 = ride_17_Q3_df['start_time'].dt.minute + (ride_17_Q3_df['start_time'].dt.hour * 60)
total_minutes_17_Q4 = ride_17_Q4_df['start_time'].dt.minute + (ride_17_Q4_df['start_time'].dt.hour * 60)
total_minutes_18_Q1 = ride_18_Q1_df['start_time'].dt.minute + (ride_18_Q1_df['start_time'].dt.hour * 60)

# Create dataframes
total_minutes_orig_df = pd.DataFrame(total_minutes_orig)
total_minutes_17_Q2_df = pd.DataFrame(total_minutes_17_Q2)
total_minutes_17_Q3_df = pd.DataFrame(total_minutes_17_Q3)
total_minutes_17_Q4_df = pd.DataFrame(total_minutes_17_Q4)
total_minutes_18_Q1_df = pd.DataFrame(total_minutes_18_Q1)

frames = [total_minutes_orig_df, total_minutes_17_Q2_df, total_minutes_17_Q3_df, total_minutes_17_Q4_df, total_minutes_18_Q1_df]
all_time_of_day_df = pd.concat(frames, ignore_index=True)

# Renaming to make sure things are clear
all_time_of_day_df.rename(columns={'start_time':'time_in_minutes'}, inplace=True)


In [9]:
### Now... the Binning ###
### Cut 1 with 2 hr am rush ###
### ALL DATA ###

# 12am-7:30, 7:30-9:30, 9:30-3:30, 3:30-7:30, 7:30-11:00, 11:00-12am
bins = [0, 450, 570, 930, 1170, 1380, 1440]

# Create the names for the bins
group_names = ['am_night', 'am_rush', 'midday', 'pm_rush', 'evening', 'pm_night']

all_time_of_day_df_cut1 = all_time_of_day_df.copy()
all_time_of_day_df_cut1["time_of_day"] = pd.cut(all_time_of_day_df_cut1["time_in_minutes"], bins, labels=group_names)

# Now group up the times of day
groupedby_time_of_day_cut1 = all_time_of_day_df_cut1.groupby('time_of_day')
groupedby_time_of_day_cut1.count()


Unnamed: 0_level_0,time_in_minutes
time_of_day,Unnamed: 1_level_1
am_night,25257
am_rush,39245
midday,147936
pm_rush,121920
evening,51817
pm_night,7108


In [10]:
### Now... the Binning ###
### Cut 2 with 3 hr am rush ###
### ALL DATA ###

# 12am-7:30, 6:30-9:30, 9:30-3:30, 3:30-7:30, 7:30-11:00, 11:00-12am
bins = [0, 390, 570, 930, 1170, 1380, 1440]

# Create the names for the bins
group_names = ['am_night', 'am_rush', 'midday', 'pm_rush', 'evening', 'pm_night']

all_time_of_day_df_cut2 = all_time_of_day_df.copy()
all_time_of_day_df_cut2["time_of_day"] = pd.cut(all_time_of_day_df_cut2["time_in_minutes"], bins, labels=group_names)

# Now group up the times of day
groupedby_time_of_day_cut2 = all_time_of_day_df_cut2.groupby('time_of_day')
groupedby_time_of_day_cut2.count()


Unnamed: 0_level_0,time_in_minutes
time_of_day,Unnamed: 1_level_1
am_night,15970
am_rush,48532
midday,147936
pm_rush,121920
evening,51817
pm_night,7108


In [None]:
### Calculate the bins for each quarter (2016 data approx 2 quarters) ###
