In [2]:
# Import Dependencies
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd
import requests
import gmaps
import os

# Import API key
from api_keys import g_key


In [3]:
# Import data file
divvy_df = pd.read_csv('Divvy_Trips_2019_Q4.csv')
divvy_df.head()

Unnamed: 0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear
0,25223640,2019-10-01 00:01:39,2019-10-01 00:17:20,2215,940.0,20,Sheffield Ave & Kingsbury St,309,Leavitt St & Armitage Ave,Subscriber,Male,1987.0
1,25223641,2019-10-01 00:02:16,2019-10-01 00:06:34,6328,258.0,19,Throop (Loomis) St & Taylor St,241,Morgan St & Polk St,Subscriber,Male,1998.0
2,25223642,2019-10-01 00:04:32,2019-10-01 00:18:43,3003,850.0,84,Milwaukee Ave & Grand Ave,199,Wabash Ave & Grand Ave,Subscriber,Female,1991.0
3,25223643,2019-10-01 00:04:32,2019-10-01 00:43:43,3275,2350.0,313,Lakeview Ave & Fullerton Pkwy,290,Kedzie Ave & Palmer Ct,Subscriber,Male,1990.0
4,25223644,2019-10-01 00:04:34,2019-10-01 00:35:42,5294,1867.0,210,Ashland Ave & Division St,382,Western Ave & Congress Pkwy,Subscriber,Male,1987.0


In [4]:
divvy_df.shape

(704054, 12)

In [5]:
# Remove null rows (if needed)
divvy_df.dropna()
divvy_df.shape

(704054, 12)

In [6]:
# Convert birthyear to age
divvy_df['age'] = (2021 - divvy_df['birthyear'])
divvy_df.head()

Unnamed: 0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,age
0,25223640,2019-10-01 00:01:39,2019-10-01 00:17:20,2215,940.0,20,Sheffield Ave & Kingsbury St,309,Leavitt St & Armitage Ave,Subscriber,Male,1987.0,34.0
1,25223641,2019-10-01 00:02:16,2019-10-01 00:06:34,6328,258.0,19,Throop (Loomis) St & Taylor St,241,Morgan St & Polk St,Subscriber,Male,1998.0,23.0
2,25223642,2019-10-01 00:04:32,2019-10-01 00:18:43,3003,850.0,84,Milwaukee Ave & Grand Ave,199,Wabash Ave & Grand Ave,Subscriber,Female,1991.0,30.0
3,25223643,2019-10-01 00:04:32,2019-10-01 00:43:43,3275,2350.0,313,Lakeview Ave & Fullerton Pkwy,290,Kedzie Ave & Palmer Ct,Subscriber,Male,1990.0,31.0
4,25223644,2019-10-01 00:04:34,2019-10-01 00:35:42,5294,1867.0,210,Ashland Ave & Division St,382,Western Ave & Congress Pkwy,Subscriber,Male,1987.0,34.0


In [7]:
# Create bins for age
bins = [10, 20, 30, 40, 50, 60, 70, 80, 90]

# Create the names for the five bins
group_names = ["11-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89"]
divvy_df["Age Group"] = pd.cut(divvy_df["age"], bins, labels=group_names, include_lowest=True)
divvy_df.head()

Unnamed: 0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,age,Age Group
0,25223640,2019-10-01 00:01:39,2019-10-01 00:17:20,2215,940.0,20,Sheffield Ave & Kingsbury St,309,Leavitt St & Armitage Ave,Subscriber,Male,1987.0,34.0,30-39
1,25223641,2019-10-01 00:02:16,2019-10-01 00:06:34,6328,258.0,19,Throop (Loomis) St & Taylor St,241,Morgan St & Polk St,Subscriber,Male,1998.0,23.0,20-29
2,25223642,2019-10-01 00:04:32,2019-10-01 00:18:43,3003,850.0,84,Milwaukee Ave & Grand Ave,199,Wabash Ave & Grand Ave,Subscriber,Female,1991.0,30.0,20-29
3,25223643,2019-10-01 00:04:32,2019-10-01 00:43:43,3275,2350.0,313,Lakeview Ave & Fullerton Pkwy,290,Kedzie Ave & Palmer Ct,Subscriber,Male,1990.0,31.0,30-39
4,25223644,2019-10-01 00:04:34,2019-10-01 00:35:42,5294,1867.0,210,Ashland Ave & Division St,382,Western Ave & Congress Pkwy,Subscriber,Male,1987.0,34.0,30-39


In [8]:
#Review bins
divvy_df['Age Group'].value_counts()

30-39    243138
20-29    207020
40-49     93019
50-59     65825
60-69     26771
11-19      3945
70-79      2279
80-89        82
Name: Age Group, dtype: int64

In [9]:
# Plot bins as bar graph

In [10]:
# Convert date columns to datetime format
divvy_df['start_time'] = pd.to_datetime(divvy_df['start_time'])
divvy_df['end_time'] = pd.to_datetime(divvy_df['end_time'])

In [11]:
# Split date from time and create columns
divvy_df['start_date'] = divvy_df['start_time'].dt.date
divvy_df['start_time'] = divvy_df['start_time'].dt.time
divvy_df['end_date'] = divvy_df['end_time'].dt.date
divvy_df['end_time'] = divvy_df['end_time'].dt.time
divvy_df.head()

Unnamed: 0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,age,Age Group,start_date,end_date
0,25223640,00:01:39,00:17:20,2215,940.0,20,Sheffield Ave & Kingsbury St,309,Leavitt St & Armitage Ave,Subscriber,Male,1987.0,34.0,30-39,2019-10-01,2019-10-01
1,25223641,00:02:16,00:06:34,6328,258.0,19,Throop (Loomis) St & Taylor St,241,Morgan St & Polk St,Subscriber,Male,1998.0,23.0,20-29,2019-10-01,2019-10-01
2,25223642,00:04:32,00:18:43,3003,850.0,84,Milwaukee Ave & Grand Ave,199,Wabash Ave & Grand Ave,Subscriber,Female,1991.0,30.0,20-29,2019-10-01,2019-10-01
3,25223643,00:04:32,00:43:43,3275,2350.0,313,Lakeview Ave & Fullerton Pkwy,290,Kedzie Ave & Palmer Ct,Subscriber,Male,1990.0,31.0,30-39,2019-10-01,2019-10-01
4,25223644,00:04:34,00:35:42,5294,1867.0,210,Ashland Ave & Division St,382,Western Ave & Congress Pkwy,Subscriber,Male,1987.0,34.0,30-39,2019-10-01,2019-10-01


In [12]:
# Organize columns in dataframe
divvy_df = divvy_df[["trip_id", "bikeid", "start_date", "start_time", "end_date", "end_time", "tripduration", "from_station_id", "from_station_name", "to_station_id", "to_station_name", "usertype", "gender", "birthyear", "age", "Age Group"]]
divvy_df.head()

Unnamed: 0,trip_id,bikeid,start_date,start_time,end_date,end_time,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,age,Age Group
0,25223640,2215,2019-10-01,00:01:39,2019-10-01,00:17:20,940.0,20,Sheffield Ave & Kingsbury St,309,Leavitt St & Armitage Ave,Subscriber,Male,1987.0,34.0,30-39
1,25223641,6328,2019-10-01,00:02:16,2019-10-01,00:06:34,258.0,19,Throop (Loomis) St & Taylor St,241,Morgan St & Polk St,Subscriber,Male,1998.0,23.0,20-29
2,25223642,3003,2019-10-01,00:04:32,2019-10-01,00:18:43,850.0,84,Milwaukee Ave & Grand Ave,199,Wabash Ave & Grand Ave,Subscriber,Female,1991.0,30.0,20-29
3,25223643,3275,2019-10-01,00:04:32,2019-10-01,00:43:43,2350.0,313,Lakeview Ave & Fullerton Pkwy,290,Kedzie Ave & Palmer Ct,Subscriber,Male,1990.0,31.0,30-39
4,25223644,5294,2019-10-01,00:04:34,2019-10-01,00:35:42,1867.0,210,Ashland Ave & Division St,382,Western Ave & Congress Pkwy,Subscriber,Male,1987.0,34.0,30-39


In [13]:
# Number of unique bikes in circulation
divvy_df['bikeid'].nunique()

5670

In [14]:
# Bike IDs with most activity

divvy_df['bikeid'].value_counts()

5886    283
4832    273
1889    270
4848    267
1100    266
       ... 
805       1
3909      1
2985      1
6710      1
786       1
Name: bikeid, Length: 5670, dtype: int64

In [15]:
# Most popular starting stations
divvy_df['from_station_name'].value_counts()


Canal St & Adams St                     12937
Clinton St & Madison St                 10580
Clinton St & Washington Blvd             9834
Columbus Dr & Randolph St                7723
Kingsbury St & Kinzie St                 7326
                                        ...  
Stony Island Ave & South Chicago Ave        2
Seeley Ave & Garfield Blvd                  1
Carpenter St & 63rd St                      1
Elizabeth St & 59th St                      1
Ashland Ave & 66th St                       1
Name: from_station_name, Length: 610, dtype: int64

In [37]:
# Most popular ending stations

divvy_df['to_station_name'].value_counts()

Canal St & Adams St               12812
Clinton St & Washington Blvd      11051
Clinton St & Madison St           10360
Streeter Dr & Grand Ave            8820
Kingsbury St & Kinzie St           7172
                                  ...  
Halsted St & 51st St                  3
Seeley Ave & Garfield Blvd            2
South Chicago Ave & 83rd St           2
Elizabeth St & 59th St                2
South Chicago Ave & Elliot Ave        1
Name: to_station_name, Length: 608, dtype: int64

In [38]:
# Most popular trips *still in-progress*
popular_stations = divvy_df.groupby(['from_station_name','to_station_name'])
popular_stations.head()


Unnamed: 0,trip_id,bikeid,start_date,start_time,end_date,end_time,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,age,Age Group
0,25223640,2215,2019-10-01,00:01:39,2019-10-01,00:17:20,940.0,20,Sheffield Ave & Kingsbury St,309,Leavitt St & Armitage Ave,Subscriber,Male,1987.0,34.0,30-39
1,25223641,6328,2019-10-01,00:02:16,2019-10-01,00:06:34,258.0,19,Throop (Loomis) St & Taylor St,241,Morgan St & Polk St,Subscriber,Male,1998.0,23.0,20-29
2,25223642,3003,2019-10-01,00:04:32,2019-10-01,00:18:43,850.0,84,Milwaukee Ave & Grand Ave,199,Wabash Ave & Grand Ave,Subscriber,Female,1991.0,30.0,20-29
3,25223643,3275,2019-10-01,00:04:32,2019-10-01,00:43:43,2350.0,313,Lakeview Ave & Fullerton Pkwy,290,Kedzie Ave & Palmer Ct,Subscriber,Male,1990.0,31.0,30-39
4,25223644,5294,2019-10-01,00:04:34,2019-10-01,00:35:42,1867.0,210,Ashland Ave & Division St,382,Western Ave & Congress Pkwy,Subscriber,Male,1987.0,34.0,30-39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
704013,25962861,6013,2019-12-31,23:19:28,2019-12-31,23:32:05,757.0,169,Canal St & Harrison St,132,Wentworth Ave & 24th St (Temp),Subscriber,Male,1982.0,39.0,30-39
704014,25962862,5474,2019-12-31,23:21:40,2019-12-31,23:37:53,973.0,146,Loomis St & Jackson Blvd,382,Western Ave & Congress Pkwy,Subscriber,,1999.0,22.0,20-29
704015,25962863,4455,2019-12-31,23:23:54,2019-12-31,23:52:32,1718.0,112,Green St & Randolph St,207,Emerald Ave & 28th St,Subscriber,Male,1980.0,41.0,40-49
704020,25962868,3939,2019-12-31,23:27:24,2020-01-01,17:25:25,64681.0,127,Lincoln Ave & Fullerton Ave,671,HUBBARD ST BIKE CHECKING (LBS-WH-TEST),Customer,Male,1995.0,26.0,20-29


In [49]:
popular_from_stations = divvy_df['from_station_name'].value_counts()

popular_from_stations.head(10)

Canal St & Adams St             12937
Clinton St & Madison St         10580
Clinton St & Washington Blvd     9834
Columbus Dr & Randolph St        7723
Kingsbury St & Kinzie St         7326
Franklin St & Monroe St          6838
Streeter Dr & Grand Ave          6774
Daley Center Plaza               6166
Michigan Ave & Washington St     6077
Lake Shore Dr & Monroe St        5960
Name: from_station_name, dtype: int64

In [41]:
station_list = divvy_df.loc[(divvy_df['from_station_id'] == 192) \
                            | (divvy_df['from_station_id'] == 77) \
                            | (divvy_df['from_station_id'] == 91)]

station_list




Unnamed: 0,trip_id,bikeid,start_date,start_time,end_date,end_time,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,age,Age Group
10,25223650,2564,2019-10-01,00:05:30,2019-10-01,00:37:36,1925.0,77,Clinton St & Madison St,506,Spaulding Ave & Armitage Ave,Subscriber,Male,1977.0,44.0,40-49
221,25223866,3559,2019-10-01,05:16:38,2019-10-01,05:22:41,362.0,192,Canal St & Adams St,284,Michigan Ave & Jackson Blvd,Subscriber,Male,1969.0,52.0,50-59
225,25223872,6255,2019-10-01,05:18:35,2019-10-01,05:31:34,779.0,192,Canal St & Adams St,142,McClurg Ct & Erie St,Subscriber,Male,1967.0,54.0,50-59
252,25223901,944,2019-10-01,05:27:32,2019-10-01,05:42:42,910.0,91,Clinton St & Washington Blvd,211,St. Clair St & Erie St,Subscriber,Male,1960.0,61.0,60-69
425,25224079,2117,2019-10-01,06:01:31,2019-10-01,06:07:41,370.0,77,Clinton St & Madison St,89,Financial Pl & Ida B Wells Dr,Subscriber,Male,1948.0,73.0,70-79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
703857,25962694,5307,2019-12-31,19:37:36,2019-12-31,19:50:12,755.0,77,Clinton St & Madison St,337,Clark St & Chicago Ave,Subscriber,Male,1988.0,33.0,30-39
703960,25962806,1511,2019-12-31,21:47:02,2019-12-31,22:14:39,1656.0,192,Canal St & Adams St,6,Dusable Harbor,Customer,Male,2002.0,19.0,11-19
703961,25962807,1589,2019-12-31,21:47:05,2019-12-31,22:14:44,1659.0,192,Canal St & Adams St,6,Dusable Harbor,Customer,Male,2002.0,19.0,11-19
703962,25962808,1736,2019-12-31,21:47:09,2019-12-31,22:14:44,1655.0,192,Canal St & Adams St,6,Dusable Harbor,Customer,Male,2002.0,19.0,11-19


In [30]:
# Groupby usertype for ridership breakdown

usertype_df = divvy_df.groupby('usertype')
usertype_df.count()

Unnamed: 0_level_0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,gender,birthyear
usertype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Customer,106194,106194,106194,106194,106194,106194,106194,106194,106194,44658,45265
Subscriber,597860,597860,597860,597860,597860,597860,597860,597860,597860,592805,597108


In [None]:
# Create calculation for average trip duration



In [None]:
# Summary statistics by each user type:
# - Gender %
# - Age groups and bins for histogram
# - Average trip duration 