# Parsing GTFS format transit data in real time

In [1]:
from google.transit import gtfs_realtime_pb2
import requests
import pandas as pd

## Building a barebones data feed

1. Initialize the FeedMessage parser from Google
2. Get the reponnse from the API
3. Pass the response to the parser

## Initialize an instance of FeedMessage
Google defines a FeedMessage class in its library. We’ll add data to this class later, but right now we just need to initialize it.

In [2]:
feed = gtfs_realtime_pb2.FeedMessage()

## Get the response from the API

In [3]:
response = requests.get('http://files.transport.act.gov.au/feeds/lightrail.pb', allow_redirects=True)

## Pass the response to the parser
The FeedMessage class has a ParseFromString() method to read in the data.

In [4]:
feed.ParseFromString(response.content)

7510

The parsed data is now available in the entity attribute:

In [7]:
feed.entity

[id: "1608712"
vehicle {
  trip {
  }
  position {
    latitude: -35.27810287475586
    longitude: 149.12933349609375
    odometer: 15684498.0
  }
  current_status: STOPPED_AT
  timestamp: 1558438035
  congestion_level: RUNNING_SMOOTHLY
  vehicle {
    id: "12"
    label: "LRV12"
    license_plate: "LRV12"
  }
}
, id: "1608713"
vehicle {
  trip {
    trip_id: "252"
  }
  position {
    latitude: -35.231201171875
    longitude: 149.14474487304688
    odometer: 8074200.0
    speed: 16.024999618530273
  }
  current_stop_sequence: 7
  timestamp: 1558438035
  congestion_level: RUNNING_SMOOTHLY
  vehicle {
    id: "14"
    label: "LRV14"
    license_plate: "LRV14"
  }
}
, id: "1608714"
vehicle {
  trip {
    trip_id: "122"
  }
  position {
    latitude: -35.23720932006836
    longitude: 149.141845703125
    odometer: 13502866.0
    speed: 18.177778244018555
  }
  current_stop_sequence: 7
  timestamp: 1558438035
  congestion_level: RUNNING_SMOOTHLY
  vehicle {
    id: "6"
    label: "LRV6"
  

In [8]:
type(feed.entity)

google.protobuf.pyext._message.RepeatedCompositeContainer

## Use `trip_update`
Not every entity in the feed will have a real-time update of transit status. Destinations and departure locations might also be included.

If you want to just focus on data that is updating a currently ongoing revenue trip, then filter for trip_update using the FeedMessage’s HasField method

In [9]:
len(feed.entity)

94

In [1]:
print('There are {} light rail trips in the dataset'.format(len(feed.entity)))

NameError: name 'feed' is not defined

In [11]:
sum([1 for ent in feed.entity if ent.HasField('trip_update')])

90

In [12]:
sum([1 for ent in feed.entity if not ent.HasField('trip_update')])

4

---

In [15]:
# looking at the first light rail
light_rail = feed.entity[-1]

In [16]:
light_rail

id: "1608805"
trip_update {
  trip {
    trip_id: "113"
  }
  stop_time_update {
    stop_sequence: 13
    arrival {
      delay: 187
      time: 1558430827
    }
    departure {
      delay: 187
      time: 1558430827
    }
    stop_id: "8100"
  }
  timestamp: 1558438035
}

In [45]:
# use dot notation to get individual pieces of data
print('light rail ID:', light_rail.id)

light rail ID: 917296


In [46]:
# nested pieces of data just needs additional dots
light_rail.vehicle.position

latitude: -35.185585021972656
longitude: 149.13560485839844
odometer: 12032020.0

In [47]:
light_rail.vehicle.position.speed

0.0

## Transform Feed to a Dictionary

In [49]:
from protobuf_to_dict import protobuf_to_dict

In [50]:
# convert to dict from our original protobuf feed
lr_dict = protobuf_to_dict(feed)

In [63]:
# get our first light rail
lr_first = lr_dict['entity'][1]

In [64]:
lr_first

{'id': '917297',
 'vehicle': {'trip': {'trip_id': '166'},
  'position': {'latitude': -35.208065032958984,
   'longitude': 149.14755249023438,
   'odometer': 16665751.0},
  'current_stop_sequence': 5,
  'timestamp': 1558394940,
  'congestion_level': 1,
  'vehicle': {'id': '11', 'label': 'LRV11', 'license_plate': 'LRV11'}}}

In [65]:
lr_first.keys()

dict_keys(['id', 'vehicle'])

In [66]:
lr_first['vehicle'].keys()

dict_keys(['trip', 'position', 'current_stop_sequence', 'timestamp', 'congestion_level', 'vehicle'])

In [67]:
lr_first['vehicle']['trip'].keys()

dict_keys(['trip_id'])

In [68]:
lr_first['vehicle']['position'].keys()

dict_keys(['latitude', 'longitude', 'odometer'])

In [69]:
lr_first['vehicle']['vehicle'].keys()

dict_keys(['id', 'label', 'license_plate'])

In [81]:
lr_first['vehicle']['position']

latitude    -3.520807e+01
longitude    1.491476e+02
odometer     1.666575e+07
dtype: float64