# 15. Union, Identity
#### By Kwan Yin Andrew Chau

In [1]:
import nbfinder
from loader import g
from loader import __
from loader import P
from loader import T
from loader import sydneyVertexId
from loader import hkVertexId

## union()

__Combine the Hong Kong airport vertex with the number of destinations__

The output from the prior step is available to the steps inside the union - the out step starts from the vertex that was found immediately before the union step

In [2]:
g.V(hkVertexId).as_('a').\
    union(__.select('a'), __.out().count()).\
    fold().next()

[v[10113032], 151]

Equivalent to

In [3]:
g.V().\
    union(__.has('airport', 'code', 'HKG'), __.has('airport', 'code', 'HKG').out().count()).\
    fold().next()

[v[32776], 151]

__This can also be done by grouping into a map__

In [4]:
g.V().has('airport', 'code', 'HKG').group().by().by(__.out().count()).next()

{v[32776]: 151}

__We can output the desc and code instead of the actual vertex__

In [5]:
g.V(hkVertexId).as_('a').\
    union(__.select('a').by('desc'), __.select('a').by('code'), __.out().count()).\
    fold().next()

['Hong Kong - Chek Lap Kok International Airport', 'HKG', 151]

## identity()

__Using identity returns the Hong Kong airport vertex passed into the current step (the union)__

In [6]:
g.V(hkVertexId).\
    union(__.identity(), __.identity().values('desc'), __.out().count()).\
    fold().next()

[v[32776], 'Hong Kong - Chek Lap Kok International Airport', 151]

__We do not need to use identity to retrieve values in this case because we have the output from the previous step__

Equivalent to

In [7]:
g.V(hkVertexId).\
    union(__.identity(), __.values('desc'), __.out().count()).\
    fold().next()

[v[32776], 'Hong Kong - Chek Lap Kok International Airport', 151]

__A constant can also be used__

In [8]:
g.V(hkVertexId).union(__.constant('Description'), __.values('desc')).fold().next()

['Description', 'Hong Kong - Chek Lap Kok International Airport']

__Results from a collection of airports in a local step where each collection has everything in the union folded__

In [9]:
g.V().hasLabel('airport').sample(10).local(
    __.union(__.values('code'), __.values('desc', 'country')).fold()
).fold().next()

[['SJI', 'San Jose Airport', 'PH'],
 ['GZO', 'Nusatupe Airport', 'SB'],
 ['CLE', 'Cleveland, Hopkins International Airport', 'US'],
 ['CUK', 'Caye Caulker Airport', 'BZ'],
 ['ERN', 'Eirunepé Airport', 'BR'],
 ['SFJ', 'Kangerlussuaq Airport', 'GL'],
 ['SLA', 'Martin Miguel De Guemes International Airport', 'AR'],
 ['YIH', 'Yichang Airport', 'CN'],
 ['GLV', 'Golovin Airport', 'US'],
 ['YXC', 'Cranbrook Airport', 'CA']]

Things inside the union do not have to be directly related

__Union of airports from the UK that can directly reach HK or airports in the US that HK can reach directly__

In [10]:
g.V(hkVertexId).union(
    __.in_().has('country','UK'), __.out().has('country','US')
).path().by('desc').fold().next()

[['Hong Kong - Chek Lap Kok International Airport', 'London Heathrow'],
 ['Hong Kong - Chek Lap Kok International Airport', 'London Gatwick'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Manchester Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Los Angeles International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'New York John F. Kennedy International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Dallas/Fort Worth International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Seattle-Tacoma'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Boston Logan'],
 ['Hong Kong - Chek Lap Kok International Airport', 'San Francisco International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', "Chicago O'Hare International Airport"],
 ['Hong Kong - Chek Lap Kok International Airport', 'Newark, Liberty']]

Union of airports in the US or Russia that HK can directly reach

In [11]:
g.V(hkVertexId).union(
    __.out().has('country','US'), __.out().has('country','RU')
).path().by('desc').fold().next()

[['Hong Kong - Chek Lap Kok International Airport', 'Los Angeles International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'New York John F. Kennedy International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Dallas/Fort Worth International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Seattle-Tacoma'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Boston Logan'],
 ['Hong Kong - Chek Lap Kok International Airport', 'San Francisco International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', "Chicago O'Hare International Airport"],
 ['Hong Kong - Chek Lap Kok International Airport', 'Newark, Liberty'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Moscow, Domodedovo International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Moscow, Sheremetyevo International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Vladivostok International Airport'],
 ['Hong Kong - Chek Lap Kok 

Equivalent to

In [12]:
g.V(hkVertexId).out('route').has(
    'country', P.within('US','RU')
).path().by('desc').fold().next()

[['Hong Kong - Chek Lap Kok International Airport', 'Los Angeles International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'New York John F. Kennedy International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Dallas/Fort Worth International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Seattle-Tacoma'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Boston Logan'],
 ['Hong Kong - Chek Lap Kok International Airport', 'San Francisco International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', "Chicago O'Hare International Airport"],
 ['Hong Kong - Chek Lap Kok International Airport', 'Newark, Liberty'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Moscow, Domodedovo International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Moscow, Sheremetyevo International Airport'],
 ['Hong Kong - Chek Lap Kok International Airport', 'Vladivostok International Airport'],
 ['Hong Kong - Chek Lap Kok 

__Union of airports in US or China that each airport in NSW can reach__

In [3]:
g.V().has('region', 'AU-NSW').local(__.union(
    __.values('desc'),
    __.out('route').has('country', 'US').values('desc'),
    __.out('route').has('country', 'CN').values('desc')
).fold()).fold().next()

[['Armidale Airport'],
 ['Moree Airport'],
 ['Parkes Airport'],
 ['Merimbula Airport'],
 ['Coffs Harbour Airport'],
 ['Narrabri Airport'],
 ['Sydney Bankstown Airport'],
 ['Sydney Kingsford Smith',
  'Los Angeles International Airport',
  'Dallas/Fort Worth International Airport',
  'Honolulu International Airport',
  'San Francisco International Airport',
  'Shanghai - Pudong International Airport',
  'Changsha Huanghua International Airport',
  'Beijing Capital International Airport',
  'Wuhan Tianhe International Airport',
  'Hangzhou Xiaoshan International Airport',
  'Kunming Wujiaba International Airport',
  "Xi'an Xianyang International Airport",
  'Chengdu Shuangliu International Airport',
  'Guangzhou Baiyun International Airport',
  'Nanjing Lukou Airport',
  'Chongqing Jiangbei International Airport'],
 ['Bathurst Airport'],
 ['Lismore Airport'],
 ['Gold Coast Airport', 'Wuhan Tianhe International Airport'],
 ['Wagga Wagga City Airport'],
 ['Grafton Airport'],
 ['Ballina Byr

__Path of airports and their distances between each airport from:__
- Any airport from London to any airport in Berlin to any airport in Lisbon
- Any airport from London to any airport in Paris to any airport in Barcelona
- Any airport from London to any airport in Edinburgh to any airport in Rome

Note that we use outE().inV() instead of out() so that we can get the distance between each route

In [14]:
g.V().has('city','London').union(
    __.outE().inV().has('city','Berlin').outE('route').inV().has('city','Lisbon').\
        path().by('code').by('dist').by('code').by('dist'),
    __.outE().inV().has('city','Paris').outE('route').inV().has('city','Barcelona').\
        path().by('code').by('dist').by('code').by('dist'),
    __.outE().inV().has('city','Edinburgh').outE('route').inV().has('city','Rome').\
        path().by('code').by('dist').by('code').by('dist')
).fold().next()

[['STN', 563, 'SXF', 1432, 'LIS'],
 ['LCY', 204, 'CDG', 533, 'BCN'],
 ['LCY', 217, 'ORY', 513, 'BCN'],
 ['LHR', 216, 'CDG', 533, 'BCN'],
 ['LHR', 227, 'ORY', 513, 'BCN'],
 ['LTN', 589, 'SXF', 1432, 'LIS'],
 ['LTN', 236, 'CDG', 533, 'BCN'],
 ['LGW', 591, 'SXF', 1432, 'LIS'],
 ['LGW', 191, 'CDG', 533, 'BCN']]

__Get the sum of the distance of routes out of Hong Kong along with the sum when excluding the route to Sydney__

In [15]:
g.V(hkVertexId).outE('route').union(
    __.values('dist').sum(),
    __.filter(__.inV().has(T.id, P.neq(sydneyVertexId))).values('dist').sum()
).fold().next()

[367122, 362531]

__Not every union step needs to return something__

In [16]:
g.V(hkVertexId).union(
    __.out().has('code','SYD'),
    __.out().has('code','LPL'),
    __.out().has('code','MAN')
).values('desc').fold().next()

['Sydney Kingsford Smith', 'Manchester Airport']

Equivalent to

In [17]:
g.V(hkVertexId).out().has('code', P.within(['SYD', 'LPL', 'MAN'])).\
    values('desc').fold().next()

['Sydney Kingsford Smith', 'Manchester Airport']