In [281]:
from datetime import datetime
import googlemaps
import config
import json
import psycopg2
from directions import Directions
%load_ext autoreload
%autoreload 2

## 0.1 Connecting to the Docker Container

In [274]:
conn = psycopg2.connect(
        host = '127.0.0.1',
        dbname = 'google_maps',
        user = 'google_user',
        password = 'googleuser',
    )
cur = conn.cursor()

conn.set_session(autocommit=True)

In [278]:
conn.close()
cur.close()

## Flow of Information

1. Calling gmaps.directions() gives back a JSON array with the directions data `directions`.
2. From `directions` we find the `legs`, which tells us information such as `arrival_time`, `departure_time`, `duration`, and the `steps` of the trip.
3. Parsing all of this will give a JSON for that particular trip.
4. Save JSON file to directory of directions files
5. Do the same for the opposite direction

## 1.1 Directions Class

In [275]:
from datetime import datetime
import googlemaps
import config

class Directions:
    """
    Connects to the Google Maps API to retrieve directions for a transit trip 
    between the two given addresses.
    
    Parameters
    -----------
    location_1: [str] First address as you would input into Google Maps.
    location_2: [str] Second address as you would input into Google Maps.

    Instance Variables (All immutable)
    ----------------------------------
    trip_start: [datetime] datetime object of when the API call was made.
    trip_duration: [int] total duration of the trip in minutes.
    trip_instructions: [dict] with train/bus lines and time on each one.
    """


    def __init__(self, location_1, location_2):

        # Connect to Google Maps API
        self.gmaps = self.gmaps_client(config.api_key)

        # Convert locations to coordinates
        self.coords = self.locations_to_coords(location_1, location_2)
        
        # Get full directions for trips between both locations
        self.full_directions_A = self.get_full_directions(self.coords)
        self.full_directions_B = self.get_full_directions(self.coords[::-1])
        
        # Get parsed directions for trips between both locations
        self.directions_A = self.get_directions(self.full_directions_A)
        self.directions_B = self.get_directions(self.full_directions_B)

    def gmaps_client(self, api_key):
        """
        Returns
        -------
        The gmaps client, given the key in the config file.
        
        Parameters
        ----------
        api_key: [str] A Google API key.
        """
        return googlemaps.Client(config.api_key)


    def locations_to_coords(self, location_1, location_2):
        """
        Returns
        -------
        List of two original location addresses as coordinates.
        
        Parameters
        -----------
        location_1: [str] First address as you would input into Google Maps.
        location_2: [str] Second address as you would input into Google Maps.
       

        Examples
        --------
        >>> self.locations_to_coords(
                'One World Trade Center', 
                '476 5th Ave, New York, NY 10018'
            )
        [
            {'lat': 40.7127431, 'lng': -74.0133795},
            {'lat': 40.75318230000001, 'lng': -73.9822534}
        ]
        """
        # Convert each location to coordinates and store in list
        coords = [
            gmaps.geocode(location)[0]['geometry']['location']
            for location in [location_1, location_2]
        ]
        return coords


    def get_full_directions(self, coords):
        """
        Returns
        --------
        A dict with full trip direction information straight from Google Maps API.

        Parameters
        ----------
        coords: List of two original location addresses as coordinates.
        """

        # Call API
        directions = gmaps.directions(
            origin = coords[0],
            destination = coords[1],
            mode = 'transit',
            departure_time = datetime.now()
        )[0]
        return directions
    
    
    def parse_steps(self, steps):
        """
        Returns
        -------
        A list of dictionaries with parsed information on transit steps of the trip.
        
        Parameters
        ----------
        steps: A list of dictinoaries with the full information on each individual step of the trip.
        """
        step_directions = []
        step_num = 1
        for step in steps:

            # We'll only consider steps that involve bus or train.
            # Entries for walking don't habe key 'transit_details'
            if step.get('transit_details'):
                distance = step['distance']['value'] # Meters
                instrs = step['html_instructions']
                line_name = step['transit_details']['line']['short_name']
                step_directions.append({
                    'step': step_num,
                    'distance': distance,
                    'html_instructions': instrs,
                    'line_name': line_name
                })
                step_num += 1
        
        return step_directions
    
    
    def get_directions(self, full_directions):
        """
        Returns
        -------
        A parsed `full_directions` dictionary.
        
        Parameters
        ----------
        full_directions: dict with full trip direction information straight from Google Maps API.
        """
        trip_directions = {}

        # Abbreviate the path
        legs = full_directions['legs'][0]
        start_location = legs['start_location']          # Coordinates
        end_location = legs['end_location']              # Coordinates
        arrival_time = legs['arrival_time']['value']     # Timestamp
        departure_time = legs['departure_time']['value'] # Timestamp
        duration = int(legs['duration']['value']/60)     # Minutes


        # Abbreviate the next path and parse
        steps = legs['steps']
        step_directions = self.parse_steps(steps)

        trip_directions = {
            'start_location': start_location,
            'end_location': end_location,
            'departure_time': departure_time,
            'arrival_time': arrival_time,
            'duration': duration,
            'steps': step_directions
        }
        
        return trip_directions
    
    
    def to_json(self, trip_directions, which_trip):
        """
        Saves `trip_directions` as JSON in a local folder denoted by `which_trip`. File path is
        data/A/ or data/B/
        
        Parameters
        ----------
        trip_directions: the parsed directions dictionary derived from get_directions() method.
        
        which_trip: [str] Either 'A' or 'B' for trip A or trip B (location 1 -> location 2 
        or location 2 -> location1) to help designate where to save this data.
        """
        # Convert departure time timestamp to string for JSON naming
        date = datetime.fromtimestamp(trip_directions['departure_time'])
        date_str = datetime.strftime(date, '%Y-%m-%d_%H-%M-%S')

        # Export to a JSON
        with open(f'data/{which_trip}/{date_str}.json', 'w') as f:
            json.dump(trip_directions, f)

## 1.2 Testing Directions Class

In [276]:
location_A = '186 Scholes St, Brooklyn NY'
location_B = '110-48 72nd Ave, Flushing NY'

In [288]:
directions = Directions(location_A, location_B)

In [289]:
directions.coords

[{'lat': 40.708711, 'lng': -73.941571}, {'lat': 40.721251, 'lng': -73.839142}]

In [290]:
directions.to_json(directions.directions_A, 'data/A/')
directions.to_json(directions.directions_B, 'data/B/')

In [311]:
import os
import glob

In [309]:
def process_data(filepath):
    """
    Performs ETL for either the song data or log data.

    Parameters
    ----------
    cur, conn: cursor and connection to sparkifydb
    
    filepath: string containing the filepath to either the song_data or log_data
        directory.
        
    func: function that performs ETL on the given set of files (either song or
        log data).
    """
    # get all files matching extension from directory
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root,'*.json'))
        for f in files :
            all_files.append(os.path.abspath(f))

    # get total number of files found
    num_files = len(all_files)
    print('{} files found in {}'.format(num_files, filepath))

#     # iterate over files and perform ETL
#     for i, datafile in enumerate(all_files, 1):
#         func(cur, datafile)
#         conn.commit()
#         print('{}/{} files processed.'.format(i, num_files))

In [312]:
process_data('data')

6 files found in data


In [314]:
dirs = directions.directions_A

In [315]:
dirs.keys()

dict_keys(['start_location', 'end_location', 'departure_time', 'arrival_time', 'duration', 'steps'])

In [321]:
dirs['steps'][0]

{'step': 1,
 'distance': 1490,
 'html_instructions': 'Subway towards 8 Av',
 'line_name': 'L'}

In [319]:
directions.coords

[{'lat': 40.708711, 'lng': -73.941571}, {'lat': 40.721251, 'lng': -73.839142}]

Tables could be:

`locations`
location_id CHAR(1) NOT NULL, 
latitude NUMERIC NOT NULL, 
longitude NUMERIC NOT NULL

`time`
trip_id INT REFERENCES duration(trip_id)
departure_time TIMESTAMP NOT NULL,
minute INT,
hour INT,
day INT,
week_of_year INT,
month INT,
year INT,
is_weekday BOOLEAN,
PRIMARY KEY(departure_time, trip_id)

`duration`
trip_id INT SERIAL
starting_loc_id CHAR(1) NOT NULL

`steps`
trip_id INT REFERENCES duration(trip_id)
step_num SMALLINT
distance BIGINT
line_name VARCHAR(5)
duration INT (might be interesting - time on trains vs time walking or waiting)
