# Capital Bikeshare Project:

Use this notebook to determine:
1. Most frequent start and stop location
2. Group by member type
3. Create heat map for stop locations?
4. Determine route using Google Maps API

In [1]:
#Import dependencies
import path
import glob
import pandas as pd
import gmaps
import gmaps.datasets
from numpy import radians, cos, sin, arcsin, sqrt
import datetime 


from config import gkey
gmaps.configure(api_key=gkey)

In [2]:
#Loop thru all CSV files, and add to DataFrame
list_ = []

path = '../capitalbikeshare/CSV Files'
allFiles = glob.glob(path + "/*.csv")

for file_ in allFiles:
    df = pd.read_csv(file_,index_col=None, header=0)
    list_.append(df)

frame = pd.concat(list_, axis = 0, ignore_index = True)

frame.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,679,2018-05-01 00:00:00,2018-05-01 00:11:19,31302,Wisconsin Ave & Newark St NW,31307,3000 Connecticut Ave NW / National Zoo,W22771,Member
1,578,2018-05-01 00:00:20,2018-05-01 00:09:59,31232,7th & F St NW / National Portrait Gallery,31609,Maine Ave & 7th St SW,W21320,Casual
2,580,2018-05-01 00:00:28,2018-05-01 00:10:09,31232,7th & F St NW / National Portrait Gallery,31609,Maine Ave & 7th St SW,W20863,Casual
3,606,2018-05-01 00:01:22,2018-05-01 00:11:29,31104,Adams Mill & Columbia Rd NW,31509,New Jersey Ave & R St NW,W00822,Member
4,582,2018-05-01 00:04:52,2018-05-01 00:14:34,31129,15th St & Pennsylvania Ave NW/Pershing Park,31118,3rd & Elm St NW,W21846,Member


In [3]:
#frame_groups = frame['End station'].value_counts()

frame_groups_size = frame.groupby(['Start station', 'End station',"Member type"]).size()

frame_groups_size = pd.DataFrame(frame_groups_size)
frame_groups_size = frame_groups_size.reset_index()
frame_groups_size = frame_groups_size.rename(columns={0:"Frequency"})
frame_groups_size.head()

Unnamed: 0,Start station,End station,Member type,Frequency
0,10th & E St NW,10th & E St NW,Casual,426
1,10th & E St NW,10th & E St NW,Member,140
2,10th & E St NW,10th & Florida Ave NW,Casual,12
3,10th & E St NW,10th & Florida Ave NW,Member,6
4,10th & E St NW,10th & G St NW,Casual,39


In [4]:
csv_file = "Capital_Bike_Share_Locations.csv"
bikeshare_location = pd.read_csv(csv_file)

bikeshare_locations = bikeshare_location[["ADDRESS","LATITUDE","LONGITUDE"]].copy()
bikeshare_locations_start = bikeshare_locations.rename(columns={"ADDRESS": "Start station", 
                                                                "LATITUDE": "Start Lat","LONGITUDE": "Start Long"})
bikeshare_locations_end = bikeshare_locations.rename(columns={"ADDRESS": "End station", 
                                                                "LATITUDE": "End Lat","LONGITUDE": "End Long"})

routes = pd.merge(frame_groups_size, bikeshare_locations_start, on=["Start station"])
routes = pd.merge(routes, bikeshare_locations_end, on=["End station"])
routes.head()

Unnamed: 0,Start station,End station,Member type,Frequency,Start Lat,Start Long,End Lat,End Long
0,10th & E St NW,10th & E St NW,Casual,426,38.895914,-77.026064,38.895914,-77.026064
1,10th & E St NW,10th & E St NW,Member,140,38.895914,-77.026064,38.895914,-77.026064
2,10th & Florida Ave NW,10th & E St NW,Casual,13,38.920387,-77.025672,38.895914,-77.026064
3,10th & Florida Ave NW,10th & E St NW,Member,16,38.920387,-77.025672,38.895914,-77.026064
4,10th & G St NW,10th & E St NW,Casual,21,38.898243,-77.026235,38.895914,-77.026064


In [6]:
routes["Distance"] = 0.0
routes = routes.sort_values(["Frequency"], ascending=False).reset_index()

#routes = routes.columns = ["Start station", "Start Lat", "Start Long", "End station", "End Lat", "End Long", "0"]
routes.head()

Unnamed: 0,index,Start station,End station,Member type,Frequency,Start Lat,Start Long,End Lat,End Long,Distance
0,92584,Smithsonian-National Mall / Jefferson Dr & 12t...,Smithsonian-National Mall / Jefferson Dr & 12t...,Casual,5506,38.888767,-77.02858,38.888767,-77.02858,0.0
1,72677,Jefferson Dr & 14th St SW,Jefferson Dr & 14th St SW,Casual,4796,38.888553,-77.032429,38.888553,-77.032429,0.0
2,49427,4th St & Madison Dr NW,4th St & Madison Dr NW,Casual,3955,38.890493,-77.017253,38.890493,-77.017253,0.0
3,73275,Lincoln Memorial,Jefferson Memorial,Casual,3871,38.888251,-77.049426,38.879819,-77.037413,0.0
4,51597,Columbus Circle / Union Station,6th & H St NE,Member,3776,38.89696,-77.00493,38.899972,-76.998347,0.0


In [7]:
#calculates distances between starts and stops per route
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * arcsin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    mi = 3959 * c
    return mi

In [8]:
#adds distance to the routes dataframe

#for row in routes.iterrows():
routes["Distance"] = haversine(routes["Start Long"],routes["Start Lat"],routes["End Long"], routes["End Lat"])
#routes = routes.sort_values(["Distance"], ascending=False).reset_index()
routes.head()

Unnamed: 0,index,Start station,End station,Member type,Frequency,Start Lat,Start Long,End Lat,End Long,Distance
0,92584,Smithsonian-National Mall / Jefferson Dr & 12t...,Smithsonian-National Mall / Jefferson Dr & 12t...,Casual,5506,38.888767,-77.02858,38.888767,-77.02858,0.0
1,72677,Jefferson Dr & 14th St SW,Jefferson Dr & 14th St SW,Casual,4796,38.888553,-77.032429,38.888553,-77.032429,0.0
2,49427,4th St & Madison Dr NW,4th St & Madison Dr NW,Casual,3955,38.890493,-77.017253,38.890493,-77.017253,0.0
3,73275,Lincoln Memorial,Jefferson Memorial,Casual,3871,38.888251,-77.049426,38.879819,-77.037413,0.870033
4,51597,Columbus Circle / Union Station,6th & H St NE,Member,3776,38.89696,-77.00493,38.899972,-76.998347,0.410652


In [9]:
routes_casual = routes[(routes["Member type"] == "Casual")]


marker_start_routes_casual = gmaps.symbol_layer(routes_casual[['Start Lat', 'Start Long']].head(25), fill_color='blue', stroke_color='blue', scale=5)
marker_end_routes_casual = gmaps.symbol_layer(routes_casual[['End Lat', 'End Long']].head(25), fill_color='red', stroke_color='red', scale=3)

fig = gmaps.figure()
fig.add_layer(gmaps.transit_layer())
fig.add_layer(marker_start_routes_casual)
fig.add_layer(marker_end_routes_casual)

fig

Figure(layout=FigureLayout(height='420px'))

In [10]:
routes_casual["Frequency"].describe()

count    47853.000000
mean        15.210290
std         82.434379
min          1.000000
25%          2.000000
50%          4.000000
75%         11.000000
max       5506.000000
Name: Frequency, dtype: float64

In [11]:
routes_casual["Distance"].describe()

count    47853.000000
mean         2.013055
std          1.398822
min          0.000000
25%          1.063026
50%          1.736895
75%          2.616548
max         18.154241
Name: Distance, dtype: float64

In [12]:
routes_member = routes[(routes["Member type"] == "Member")]


marker_start_routes_member = gmaps.symbol_layer(routes_member[['Start Lat', 'Start Long']].head(25), fill_color='blue', stroke_color='blue', scale=4)
marker_end_routes_member = gmaps.symbol_layer(routes_member[['End Lat', 'End Long']].head(25), fill_color='red', stroke_color='red', scale=3)

fig = gmaps.figure()
fig.add_layer(marker_start_routes_member)
fig.add_layer(marker_end_routes_member)
fig.add_layer(gmaps.transit_layer())
fig

Figure(layout=FigureLayout(height='420px'))

In [13]:
routes_member["Frequency"].describe()

count    67561.000000
mean        39.534450
std         95.687731
min          1.000000
25%          2.000000
50%          9.000000
75%         37.000000
max       3776.000000
Name: Frequency, dtype: float64

In [15]:
routes_member["Distance"].describe()

count    67561.000000
mean         2.195852
std          1.327552
min          0.000000
25%          1.245084
50%          2.028797
75%          2.904770
max         19.000371
Name: Distance, dtype: float64

In [16]:
starts_df = frame.groupby(['Start station']).size()
starts_df = pd.DataFrame(starts_df).reset_index()
starts_df = starts_df.rename(columns={"Start station": "Station", 0: "Frequency"})

ends_df = frame.groupby(['End station']).size()
ends_df = pd.DataFrame(ends_df).reset_index()
ends_df = ends_df.rename(columns={"End station": "Station", 0: "Frequency"})


most_popular = pd.merge(starts_df, ends_df, on="Station")
most_popular["Total"] = most_popular["Frequency_x"] + most_popular["Frequency_y"]
most_popular = most_popular.rename(columns={"Frequency_x":"Start count", "Frequency_y":"End count"})

bikeshare_locations_pop = bikeshare_locations.rename(columns={"ADDRESS": "Station", 
                                                                "LATITUDE": "Lat","LONGITUDE": "Long"})
most_popular = pd.merge(most_popular, bikeshare_locations_pop, on=["Station"])
most_popular = most_popular.sort_values(["Total"], ascending=False).reset_index()
most_popular.head()


Unnamed: 0,index,Station,Start count,End count,Total,Lat,Long
0,219,Columbus Circle / Union Station,63493,68100,131593,38.89696,-77.00493
1,332,Lincoln Memorial,53869,54397,108266,38.888251,-77.049426
2,302,Jefferson Dr & 14th St SW,43993,45473,89466,38.888553,-77.032429
3,144,4th St & Madison Dr NW,41888,42119,84007,38.890493,-77.017253
4,298,Henry Bacon Dr & Lincoln Memorial Circle NW,41020,41565,82585,38.890544,-77.049379


In [17]:
#displays heatmap of most popular locations irregardless of stop or start
marker_layer = gmaps.heatmap_layer(most_popular[['Lat', 'Long']], weights=most_popular['Total'], max_intensity=50, point_radius=6.0)

fig = gmaps.figure()
fig.add_layer(marker_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [19]:
#head_len = 10
#distance_layer_start = gmaps.symbol_layer(routes_distance[['Start Lat', 'Start Long']].head(head_len), fill_color='blue', stroke_color='blue', scale=3)
#distance_layer_end = gmaps.symbol_layer(routes_distance[['End Lat', 'End Long']].head(head_len), fill_color='red', stroke_color='red', scale=3)

#fig = gmaps.figure()
#fig.add_layer(distance_layer_start)
#fig.add_layer(distance_layer_end)

#fig

#start_loc = routes_distance[['Start Lat','Start Long']].values.tolist()
#start_loc_one = start_loc[0]
#end_loc = routes_distance[['End Lat','End Long']].values.tolist()
#end_loc_one = end_loc[0]

#fig = gmaps.figure()
#long_distance = gmaps.directions_layer(start_loc_one, end_loc_one)


#fig.add_layer(long_distance)
#fig.add_layer(gmaps.transit_layer())

#fig