In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Data files
ride_data_orig = "Resources/Metro_Bike_Share_Trip_Data.csv"
ride_data_17_Q2 = "Resources/la_metro_gbfs_trips_Q2_2017.csv"
ride_data_17_Q3 = "Resources/metro-bike-share-trips-2017-q3.csv"
ride_data_17_Q4 = "Resources/metro-bike-share-trips-2017-q4-v2.csv"
ride_data_18_Q1 = "Resources/metro-bike-share-trips-2018-q1.csv"
weather_data = "Resources/la_weather_2016_2018.csv"

ride_orig_df = pd.read_csv(ride_data_orig, low_memory=False)
ride_17_Q2_df = pd.read_csv(ride_data_17_Q2, low_memory=False)
ride_17_Q3_df = pd.read_csv(ride_data_17_Q3, low_memory=False)
ride_17_Q4_df = pd.read_csv(ride_data_17_Q4, low_memory=False)
ride_18_Q1_df = pd.read_csv(ride_data_18_Q1, low_memory=False)
weather_data_df = pd.read_csv(weather_data, low_memory=False)

# Make the start time column headers the same
ride_orig_df.rename(columns={'Start Time':'start_time'}, inplace=True)

# Convert to datetime objects (first dataframe contains data from 7/16 to 3/17)
ride_orig_df['start_time'] = pd.to_datetime(ride_orig_df['start_time'], format='%m/%d/%Y %I:%M:%S %p')
ride_17_Q2_df['start_time'] = pd.to_datetime(ride_17_Q2_df['start_time'], format='%m/%d/%y %H:%M')
ride_17_Q3_df['start_time'] = pd.to_datetime(ride_17_Q3_df['start_time'], format='%m/%d/%y %H:%M')
ride_17_Q4_df['start_time'] = pd.to_datetime(ride_17_Q4_df['start_time'], format='%m/%d/%y %H:%M')
ride_18_Q1_df['start_time'] = pd.to_datetime(ride_18_Q1_df['start_time'], format='%m/%d/%y %H:%M')

# Add columns to original data so can be split out by year and month
ride_orig_df['Month'] = ride_orig_df['start_time'].dt.month
ride_orig_df['Year'] = ride_orig_df['start_time'].dt.year


In [2]:
# Bin defines

# 12am-6:30, 6:30-11:00, 11:00-2:00, 2:00-5:30, 5:30-7:30, 7:30-11:00, 11:00-12am
bins = [0, 390, 660, 840, 1050, 1170, 1380, 1440]

# Create the names for the bins
group_names = ['am_night (12am-6:30am)', 'morning (6:30am-11am)', 'lunch (11am-2pm)',
               'afternoon (2pm-5:30pm)', 'pm rush (5:30pm-7:30pm)', 'evening (7:30pm-11pm)', 'pm_night (11pm-12am)']


In [3]:
### Create a column in the dataframes to enable binning.
### Convert the time to total minutes (seconds are ignored)
### Create dataframs for each quarter

# Splitup original data file into 3 quarters
# Get 2016 data
ride_2106_df = ride_orig_df.loc[ride_orig_df['Year'] == 2016]

# Get Months
ride_2106_16_7_df = ride_2106_df.loc[ride_2106_df['Month'] == 7]
ride_2106_16_8_df = ride_2106_df.loc[ride_2106_df['Month'] == 8]
ride_2106_16_9_df = ride_2106_df.loc[ride_2106_df['Month'] == 9]
ride_2106_16_10_df = ride_2106_df.loc[ride_2106_df['Month'] == 10]
ride_2106_16_11_df = ride_2106_df.loc[ride_2106_df['Month'] == 11]
ride_2106_16_12_df = ride_2106_df.loc[ride_2106_df['Month'] == 12]

# Create 2016 quarters total minutes dataframes
frames = [ride_2106_16_7_df, ride_2106_16_7_df, ride_2106_16_7_df]
ride_16_Q3_df = pd.concat(frames, ignore_index=True)
frames = [ride_2106_16_10_df, ride_2106_16_11_df, ride_2106_16_12_df]
ride_16_Q4_df = pd.concat(frames, ignore_index=True)

# Get 2017 data
ride_17_Q1_df = ride_orig_df.loc[ride_orig_df['Year'] == 2017]

# Get total minutes
total_minutes_16_Q3 = ride_16_Q3_df['start_time'].dt.minute + (ride_16_Q3_df['start_time'].dt.hour * 60)
total_minutes_16_Q4 = ride_16_Q4_df['start_time'].dt.minute + (ride_16_Q4_df['start_time'].dt.hour * 60)
total_minutes_17_Q1 = ride_17_Q1_df['start_time'].dt.minute + (ride_17_Q1_df['start_time'].dt.hour * 60)
total_minutes_17_Q2 = ride_17_Q2_df['start_time'].dt.minute + (ride_17_Q2_df['start_time'].dt.hour * 60)
total_minutes_17_Q3 = ride_17_Q3_df['start_time'].dt.minute + (ride_17_Q3_df['start_time'].dt.hour * 60)
total_minutes_17_Q4 = ride_17_Q4_df['start_time'].dt.minute + (ride_17_Q4_df['start_time'].dt.hour * 60)
total_minutes_18_Q1 = ride_18_Q1_df['start_time'].dt.minute + (ride_18_Q1_df['start_time'].dt.hour * 60)

# Create dataframes
total_minutes_16_Q3_df = pd.DataFrame(total_minutes_16_Q3)
total_minutes_16_Q4_df = pd.DataFrame(total_minutes_16_Q4)
total_minutes_17_Q1_df = pd.DataFrame(total_minutes_17_Q1)
total_minutes_17_Q2_df = pd.DataFrame(total_minutes_17_Q2)
total_minutes_17_Q3_df = pd.DataFrame(total_minutes_17_Q3)
total_minutes_17_Q4_df = pd.DataFrame(total_minutes_17_Q4)
total_minutes_18_Q1_df = pd.DataFrame(total_minutes_18_Q1)


In [4]:
### Binning
### 2016 Q3

# Renaming to make sure things are clear
total_minutes_16_Q3_df.rename(columns={'start_time':'time_in_minutes'}, inplace=True)

total_minutes_16_Q3_df_cut = total_minutes_16_Q3_df.copy()
total_minutes_16_Q3_df_cut["time_of_day"] = pd.cut(total_minutes_16_Q3_df_cut["time_in_minutes"], bins, labels=group_names)

# Now group up the times of day
groupedby_time_of_day_cut = total_minutes_16_Q3_df_cut.groupby('time_of_day')
groupedby_time_of_day_cut.count()


Unnamed: 0_level_0,time_in_minutes
time_of_day,Unnamed: 1_level_1
am_night (12am-6:30am),1422
morning (6:30am-11am),5688
lunch (11am-2pm),7716
afternoon (2pm-5:30pm),7716
pm rush (5:30pm-7:30pm),5388
evening (7:30pm-11pm),5643
pm_night (11pm-12am),687


In [5]:
### Now... the Binning
### With lunch hour
### ALL DATA

# Create one dataframe for all data
frames = [total_minutes_16_Q3_df, total_minutes_16_Q4_df, total_minutes_17_Q2_df, total_minutes_17_Q3_df,
          total_minutes_17_Q4_df, total_minutes_18_Q1_df]
all_time_of_day_df = pd.concat(frames, ignore_index=True, sort=False)

# Renaming to make sure things are clear
all_time_of_day_df.rename(columns={'start_time':'time_in_minutes'}, inplace=True)

all_time_of_day_df_cut = all_time_of_day_df.copy()
all_time_of_day_df_cut["time_of_day"] = pd.cut(all_time_of_day_df_cut["time_in_minutes"], bins, labels=group_names)

# Now group up the times of day
groupedby_time_of_day_cut = all_time_of_day_df_cut.groupby('time_of_day')
groupedby_time_of_day_cut.count()


ValueError: Input array must be 1 dimensional