In [8]:
import pandas as pd
import numpy as np
import googlemaps
import config

gmaps = googlemaps.Client(key=config.gmaps_api_key)

In [2]:
# Read text file into pandas. By observation, use new line as separator declare no header so first line is read as data.
trips_df = pd.read_csv('trips.txt', sep="\n", header=None)

# Convert df from number_of_trips*5 by 1 to number_of_trips by 5
trips_df = (pd.DataFrame(np.reshape(trips_df.values,(round(len(trips_df)/5), 5)), 
                         columns=['start_time','start_loc','end_time','end_loc','duration']))

# Convert strings of times to datetimes
trips_df[['start_time','end_time']] = trips_df[['start_time','end_time']].apply(pd.to_datetime)

# Replace imported duration time with datetime calculated from start and end times.
trips_df['duration'] = trips_df['end_time'] - trips_df['start_time']

trips_df.head()

Unnamed: 0,start_time,start_loc,end_time,end_loc,duration
0,2018-04-28 14:24:58,Bond St & Fulton St,2018-04-28 15:12:36,11 St & 35 Ave,00:47:38
1,2018-04-24 08:09:43,1 Ave & E 68 St,2018-04-24 08:29:39,E 19 St & 3 Ave,00:19:56
2,2018-04-23 08:20:09,Bond St & Fulton St,2018-04-23 08:46:25,E 19 St & 3 Ave,00:26:16
3,2018-04-18 17:55:20,E 19 St & 3 Ave,2018-04-18 18:25:50,Bond St & Fulton St,00:30:30
4,2018-04-14 18:35:31,Central Park West & W 85 St,2018-04-14 18:47:31,E 71 St & 1 Ave,00:12:00


In [3]:
# Count number of trips that start and end at same station. For these trips, assume no meaningful travel. Sum durations for later reporting.
non_trips = sum(trips_df['start_loc'] == trips_df['end_loc'])
non_trips_time = trips_df[trips_df['start_loc'] == trips_df['end_loc']]['duration'].sum()

# Drop these trips, reset index.
trips_df.drop(trips_df[trips_df['start_loc'] == trips_df['end_loc']].index, inplace = True)
trips_df.reset_index(inplace = True, drop = True)

# Append city and state to locations to avoid ambiguity.

trips_df[['start_loc','end_loc']] = trips_df[['start_loc','end_loc']].astype(str) + ", New York, NY"

In [4]:
trips_df.head()

Unnamed: 0,start_time,start_loc,end_time,end_loc,duration
0,2018-04-28 14:24:58,"Bond St & Fulton St, New York, NY",2018-04-28 15:12:36,"11 St & 35 Ave, New York, NY",00:47:38
1,2018-04-24 08:09:43,"1 Ave & E 68 St, New York, NY",2018-04-24 08:29:39,"E 19 St & 3 Ave, New York, NY",00:19:56
2,2018-04-23 08:20:09,"Bond St & Fulton St, New York, NY",2018-04-23 08:46:25,"E 19 St & 3 Ave, New York, NY",00:26:16
3,2018-04-18 17:55:20,"E 19 St & 3 Ave, New York, NY",2018-04-18 18:25:50,"Bond St & Fulton St, New York, NY",00:30:30
4,2018-04-14 18:35:31,"Central Park West & W 85 St, New York, NY",2018-04-14 18:47:31,"E 71 St & 1 Ave, New York, NY",00:12:00


In [5]:
# Group and count rows where start_loc and end_loc have the same values. Reset index and rename column to be count.

unique = trips_df.groupby(['start_loc','end_loc']).size().reset_index().rename(columns= {0:'count'})
unique.head()

Unnamed: 0,start_loc,end_loc,count
0,"1 Ave & E 16 St, New York, NY","Duffield St & Willoughby St, New York, NY",1
1,"1 Ave & E 68 St, New York, NY","E 17 St & Broadway, New York, NY",1
2,"1 Ave & E 68 St, New York, NY","E 19 St & 3 Ave, New York, NY",14
3,"1 Ave & E 68 St, New York, NY","E 20 St & Park Ave, New York, NY",2
4,"1 Ave & E 68 St, New York, NY","E 44 St & 2 Ave, New York, NY",1


In [6]:
def get_bike_directions(row):
    origin = row['start_loc']
    destination = row['end_loc']
    mode = "bicycling"
    return gmaps.directions(origin, destination, mode)

directions = unique.apply(get_bike_directions, axis = 1)

In [9]:
unique['directions'] = directions

In [10]:
unique['dist'] = unique['directions'].apply(lambda x : x[0]["legs"][0]['distance']['text'])
unique['google_time'] = unique['directions'].apply(lambda x : x[0]["legs"][0]['duration']['text'])

In [11]:
unique.head()

Unnamed: 0,start_loc,end_loc,count,directions,dist,google_time
0,"1 Ave & E 16 St, New York, NY","Duffield St & Willoughby St, New York, NY",1,"[{'bounds': {'northeast': {'lat': 40.7333362, ...",3.6 mi,22 mins
1,"1 Ave & E 68 St, New York, NY","E 17 St & Broadway, New York, NY",1,"[{'bounds': {'northeast': {'lat': 40.7652381, ...",3.5 mi,20 mins
2,"1 Ave & E 68 St, New York, NY","E 19 St & 3 Ave, New York, NY",14,"[{'bounds': {'northeast': {'lat': 40.7652381, ...",3.1 mi,17 mins
3,"1 Ave & E 68 St, New York, NY","E 20 St & Park Ave, New York, NY",2,"[{'bounds': {'northeast': {'lat': 40.7652381, ...",3.5 mi,20 mins
4,"1 Ave & E 68 St, New York, NY","E 44 St & 2 Ave, New York, NY",1,"[{'bounds': {'northeast': {'lat': 40.7652381, ...",1.8 mi,10 mins


In [12]:
trips_df = pd.merge(trips_df, unique, how='left', left_on=['start_loc','end_loc'], right_on=['start_loc','end_loc']).drop(['count'], axis =1)

In [43]:
#trips_df['dist'].apply(lambda x : x = )

'2.9 mi'

['7.5 mi',
 '3.1 mi',
 '4.2 mi',
 '4.0 mi',
 '3.1 mi',
 '7.6 mi',
 '1.5 mi',
 '2.5 mi',
 '0.5 mi',
 '1.3 mi',
 '2.7 mi',
 '2.2 mi',
 '4.2 mi',
 '4.0 mi',
 '4.0 mi',
 '4.2 mi',
 '1.0 mi',
 '3.6 mi',
 '0.5 mi',
 '4.0 mi',
 '3.6 mi',
 '0.4 mi',
 '0.4 mi',
 '2.6 mi',
 '0.6 mi',
 '1.8 mi',
 '0.3 mi',
 '0.6 mi',
 '0.6 mi',
 '0.4 mi',
 '4.2 mi',
 '3.8 mi',
 '4.2 mi',
 '5.0 mi',
 '3.8 mi',
 '2.7 mi',
 '3.8 mi',
 '4.2 mi',
 '2.7 mi',
 '1.3 mi',
 '3.7 mi',
 '4.0 mi',
 '2.7 mi',
 '3.8 mi',
 '4.0 mi',
 '4.2 mi',
 '3.6 mi',
 '4.3 mi',
 '4.2 mi',
 '4.0 mi',
 '3.1 mi',
 '3.8 mi',
 '3.1 mi',
 '2.7 mi',
 '4.2 mi',
 '3.5 mi',
 '3.4 mi',
 '3.9 mi',
 '4.2 mi',
 '0.4 mi',
 '3.9 mi',
 '3.4 mi',
 '6.4 mi',
 '3.1 mi',
 '4.2 mi',
 '3.1 mi',
 '3.8 mi',
 '3.1 mi',
 '2.7 mi',
 '4.2 mi',
 '2.7 mi',
 '5.6 mi',
 '1.9 mi',
 '3.1 mi',
 '1.5 mi',
 '4.2 mi',
 '6.1 mi',
 '3.2 mi',
 '3.1 mi',
 '2.2 mi',
 '4.8 mi',
 '2.5 mi',
 '4.2 mi',
 '4.0 mi',
 '3.8 mi',
 '3.8 mi',
 '4.2 mi',
 '1.5 mi',
 '4.2 mi',
 '4.4 mi',
 '3.1 mi',