In [4]:
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import\
    DriverRemoteConnection
from gremlin_python.process.traversal import T
from gremlin_python.process.traversal import P
from gremlin_python.process.graph_traversal import __
import json


# TinkerPop 3.5.0 
## Tornado replaced 

Since release, the Python client has used Tornado for its websocket and other transport needs. While this generally worked well, in some cases, such as when working within Jupyter notebooks, the Gremlin client encountered conflicts when trying to create its event loop. Starting with the 3.5.0 release, the Python client no longer uses Tornado. Instead AIO HTTP is used. For most users this will have zero impact to their code. However any code using features provided by Tornado may have to be slightly revised to use AIO HTTP instead. The move to AIO HTTP removes the event loop issues experienced by users of the prior clients and opens the door for additional future improvements.

## max_content_length 

The Gremlin Java clients provide a setting that can be used to increase, or decrease, the maximum size of the result that can be returned to a client. The Tornado default is 10,240 bytes ( 10 * 10 * 1024) but the prior Python clients did not expose a way to set this value. Starting with the 3.5.0 Python client, transport specific arguments can now be provided when a remote connection is created. These are passed as kwargs to allow for easy expansion in the future without having to add additional arguments to the API each time a new setting is needed. If a value is not provided for max_content_length the same default 10,0240 byte value is used. The code below shows an example of how this works.


In [5]:
server = '172.17.0.2'
port = 8182
endpoint = 'ws://' + server + ':' + str(port) + '/gremlin'
transport_args = {'max_content_length': 200000}
connection = DriverRemoteConnection(endpoint, 'g', **transport_args)
g = traversal().withRemote(connection)
# results = g.V('1','2','3').valueMap().toList()
# print(results)

In [45]:
g.addV('person').property('name', 'serg').as_('m'). \
        addV('person').property('name', 'alex').as_('v'). \
        addE('knows').from_('m').to('v').iterate()

[['addV', 'person'], ['property', 'name', 'serg'], ['as', 'm'], ['addV', 'person'], ['property', 'name', 'alex'], ['as', 'v'], ['addE', 'knows'], ['from', 'm'], ['to', 'v'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

In [46]:
g.V().has('person', 'name', 'serg').valueMap().fold().next()

[{'name': ['serg']}]

In [47]:
    print(
        "marko: " + json.dumps(g.V().has('person', 'name', 'serg')
                              .valueMap().
                              next())
    )

marko: {"name": ["serg"]}


In [48]:
print(
        "who marko knows: " + json.dumps(g.V().has('person', 'name', 'serg')
                                        .out('knows')
                                        .valueMap()
                                        .next())
    )

who marko knows: {"name": ["alex"]}


In [49]:
# g.V().drop().iterate()
g.V().hasLabel('person').valueMap().next()

{'name': ['serg']}

## Load a dataset

In [97]:
g.V().drop().iterate()
g.io('/home/serg/projects/Bigdata/dataset/air-routes.graph.xml').read().iterate()

[['io', '/home/serg/projects/Bigdata/dataset/air-routes.graph.xml'], ['read'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

## has(), hasLabel(), hasId()

In [98]:
findId = g.V().hasLabel('airport').has('code', 'SYD').next()
g.V().hasLabel('airport').has('code', 'SYD').next() == g.V().has('airport', 'code', 'SYD').next() == g.V().hasId(findId).next()

True

In [99]:
 # Check data in Vertex by id
 print(findId)
 req = g.V().hasId(findId).valueMap().next()
 print(req)

v[9080968]
{'code': ['SYD'], 'type': ['airport'], 'desc': ['Sydney Kingsford Smith'], 'country': ['AU'], 'longest': [12999], 'city': ['Sydney'], 'elev': [21], 'icao': ['YSSY'], 'lon': [151.177001953125], 'region': ['AU-NSW'], 'runways': [3], 'lat': [-33.9460983276367]}


In [92]:
# Delete data by id
# req = g.V().hasId(findId).drop().iterate()
# print(req)

[['V'], ['hasId', v[7192816]], ['drop'], ['none']]


## Count

In [63]:
print(g.V().count().next())
print(g.V().hasLabel('country').count().next())
print(g.V().hasLabel('airport').count().next())

3618
237
3373


## valueMap()


In [100]:
g.V().has('airport', 'code', 'SYD').valueMap().next()

{'code': ['SYD'],
 'type': ['airport'],
 'desc': ['Sydney Kingsford Smith'],
 'country': ['AU'],
 'longest': [12999],
 'city': ['Sydney'],
 'elev': [21],
 'icao': ['YSSY'],
 'lon': [151.177001953125],
 'region': ['AU-NSW'],
 'runways': [3],
 'lat': [-33.9460983276367]}

In [11]:
# Check all data in request using next()
results = g.V(findId).values()
count = g.V(findId).values().count().next()
for c in range(count):
  print(results.next())

SYD
airport
Sydney Kingsford Smith
AU
12999
Sydney
21
YSSY
151.177001953125
AU-NSW
3
-33.9460983276367


In [12]:
req = g.V(findId).values('desc', 'runways')
print(f'{req.next()} airport has {req.next()} runways...')


Sydney Kingsford Smith airport has 3 runways...


## unfold() fold()

In [13]:

req = g.V(findId).valueMap().unfold()
count = g.V(findId).values().count().next()
for i in range(count):
  print(f'{str(i)}, {req.next()}')


0, {'code': ['SYD']}
1, {'type': ['airport']}
2, {'desc': ['Sydney Kingsford Smith']}
3, {'country': ['AU']}
4, {'longest': [12999]}
5, {'city': ['Sydney']}
6, {'elev': [21]}
7, {'icao': ['YSSY']}
8, {'lon': [151.177001953125]}
9, {'region': ['AU-NSW']}
10, {'runways': [3]}
11, {'lat': [-33.9460983276367]}


In [14]:
req = g.V(findId).valueMap().unfold().fold().next()
req

[{'code': ['SYD']},
 {'type': ['airport']},
 {'desc': ['Sydney Kingsford Smith']},
 {'country': ['AU']},
 {'longest': [12999]},
 {'city': ['Sydney']},
 {'elev': [21]},
 {'icao': ['YSSY']},
 {'lon': [151.177001953125]},
 {'region': ['AU-NSW']},
 {'runways': [3]},
 {'lat': [-33.9460983276367]}]

## hasNot(), not()

In [16]:
req = g.V().hasNot('region').valueMap()
count = g.V().hasNot('region').count().next()
for c in range(count):
    if c == 10:
        break
    print(req.next())

{'code': ['AL'], 'type': ['country'], 'desc': ['Albania']}
{'code': ['BB'], 'type': ['country'], 'desc': ['Barbados']}
{'code': ['BO'], 'type': ['country'], 'desc': ['Bolivia']}
{'code': ['BQ'], 'type': ['country'], 'desc': ['Bonaire - Sint Eustatius and Saba']}
{'code': ['BG'], 'type': ['country'], 'desc': ['Bulgaria']}
{'code': ['BI'], 'type': ['country'], 'desc': ['Burundi']}
{'code': ['KY'], 'type': ['country'], 'desc': ['Cayman Islands']}
{'code': ['CF'], 'type': ['country'], 'desc': ['Central African Republic']}
{'code': ['CC'], 'type': ['country'], 'desc': ['Cocos (Keeling) Islands']}
{'code': ['CD'], 'type': ['country'], 'desc': ['Democratic Republic of the Congo']}


In [17]:
same_count = g.V().not_(__.has('region')).count().next()

In [18]:
g.V().hasLabel('airport').next(3)

[v[5849112], v[5853208], v[5857304]]

In [19]:
g.E().hasLabel('route').count().next()

43400

In [20]:
results = g.V(findId).inE('route').outV().values('desc')
count = g.V(findId).inE('route').outV().count().next()
for c in range(count):
    print(results.next())

Melbourne International Airport
Wellington International Airport
Manila, Ninoy Aquino International Airport
Santiago, Comodoro Arturo Merino Benitez International Airport
Cairns International Airport
Darwin International Airport
Indira Gandhi International Airport
Auckland International Airport
Christchurch International Airport
Tokyo Haneda International Airport
Doha, Hamad International Airport
Alice Springs Airport
Dallas/Fort Worth International Airport
Soekarno-Hatta International Airport
Honolulu International Airport
Vancouver
Guangzhou Baiyun International Airport
Perth International Airport
Canberra International Airport
Seoul, Incheon International Airport
Sunshine Coast Airport
Dubai International Airport
Hong Kong - Chek Lap Kok International Airport
San Francisco International Airport
Singapore, Changi International Airport
Kuala Lumpur International Airport
Ho Chi Minh City, Tan Son Nhat International Airport
Ayers Rock Connellan Airport
Gold Coast Airport
Abu Dhabi Inter

## Get types of vertices

In [21]:
a = g.V().groupCount().by(T.label).next()
print(a)

{'continent': 7, 'country': 237, 'version': 1, 'airport': 3374}


In [22]:
g.V().hasLabel('airport').groupCount().by('country').select('FR','GR','BE').next()

{'FR': 58, 'GR': 39, 'BE': 5}

In [23]:
g.V().hasLabel('continent').group().\
    by('code').by(__.out().count()).next()

{'EU': 583, 'AS': 932, 'NA': 978, 'OC': 284, 'AF': 294, 'AN': 0, 'SA': 303}

In [24]:
g.V().hasLabel('airport').limit(5).group().\
    by('code').by('runways').next()

{'ORD': [8], 'BNA': [4], 'BWI': [3], 'IAD': [4], 'FLL': [2]}

In [4]:
g.V().has('region', 'AU-NSW').outE('route').fold().next()

value': {'relationId': 'goyrn-3yhi0-fth-3iiag'}}][6647832-route->5902360],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5gqs-3tnog-fth-1pge8'}}][6422560-route->2867264],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5h50-3tnog-fth-20oxs'}}][6422560-route->3391552],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5j44-3tu00-fth-1pge8'}}][6430752-route->2867264],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5jic-3tu00-fth-3iiag'}}][6430752-route->5902360],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5jwk-3tx5s-fth-1pge8'}}][6434848-route->2867264],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5kas-3tx5s-fth-1pjk0'}}][6434848-route->2871360],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relationId': 'g5kp0-3tx5s-fth-1wtw8'}}][6434848-route->3211352],
 e[{'@type': 'janusgraph:RelationIdentifier', '@value': {'relati

In [9]:
g.V(findId).in_('route').count().next()

94

In [11]:
g.V().has('region', P.within('AU-NSW', 'AU-VIC')).\
    values('desc').fold().next()

['Albury Airport',
 'Moree Airport',
 'Coffs Harbour Airport',
 'Portland Airport',
 'Bathurst Airport',
 'Tamworth Airport',
 'Wagga Wagga City Airport',
 'Taree Airport',
 'Melbourne Essendon Airport',
 'Sydney Kingsford Smith',
 'Melbourne International Airport',
 'Parkes Airport',
 'Armidale Airport',
 'Broken Hill Airport',
 'Lord Howe Island Airport',
 'Merimbula Airport',
 'Narrandera Airport',
 'Gold Coast Airport',
 'Griffith Airport',
 'Sydney Bankstown Airport',
 'Avalon Airport',
 'Orange Airport',
 'Lismore Airport',
 'Moruya Airport',
 'Grafton Airport',
 'Narrabri Airport',
 'Warrnambool Airport',
 'Ballina Byron Gateway Airport',
 'Mildura Airport',
 'Port Macquarie Airport',
 'Dubbo City Regional Airport',
 'Newcastle Airport']

In [12]:
g.V().has('region', P.within('AU-NSW', 'AU-VIC')).\
    values('runways').mean().next()

1.875

In [17]:
 g.V(findId).as_('hk').out('route').in_('route').\
    where(P.neq('hk')).dedup().count().next()

1199

In [18]:
g.V(findId).out('route').tail(2).fold().next()

[v[6643736], v[6647832]]

In [22]:
g.V(findId).out().out().out().has('code', 'SYD').limit(10).path().by('desc').fold().next()

[path[Sydney Kingsford Smith, Melbourne International Airport, Wellington International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Manila, Ninoy Aquino International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Cairns International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Darwin International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Indira Gandhi International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Auckland International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Christchurch International Airport, Sydney Kingsford Smith],
 path[Sydney Kingsford Smith, Melbourne International Airport, Doha, Hamad International Airport, Sydney Kingsford Smith],
 path[Sydney Kings

In [25]:
 g.V(findId).as_('a').out().as_('b').out().as_('c').path().\
    by('code').from_('b').to('c').limit(10).fold().next()

[path[DFW, DCA],
 path[DFW, PBI],
 path[DFW, ANC],
 path[DFW, FLL],
 path[DFW, BWI],
 path[DFW, TPA],
 path[DFW, JFK],
 path[DFW, BNA],
 path[DFW, SAT],
 path[DFW, SEA]]

## Select

In [25]:
# Select the traversal of airports from HK that are 3 flights away
g.V(findId).as_('a').out().as_('b').out().as_('c').out().as_('d').\
    select('a', 'b', 'c', 'd').limit(10).fold().next()

[{'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2871360]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2875456]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2891840]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2936896]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2945088]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2961472]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[2994352]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[3002544]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[3010736]},
 {'a': v[2867264], 'b': v[2871360], 'c': v[2867264], 'd': v[3018928]}]

## Mean

In [27]:
g.V().hasLabel('airport').\
    local(__.out('route').count()).mean().next()

12.863070539419088

In [127]:
g.V().has('region','AU-NSW').order().by('code').values('code','city').limit(10).fold().next()

['ABX',
 'Albury',
 'ARM',
 'Armidale',
 'BHQ',
 'Broken Hill',
 'BHS',
 'Bathurst',
 'BNK',
 'Ballina']

## SuM()

In [126]:
g.V().hasLabel('airport').values('runways').sum().next()

4828

## Max()

In [7]:
g.V().hasLabel('airport').values('longest').max().next()

18045

# TASK, Select vertex to delete
Пример схемы с богами.
Написать метод рекурсивного обхода дерева с циклическими связями - и пометить узлы для удаления например


In [96]:
# Repeating traversal
'''
 * dedup() - Remove duplicates
 * where(neq("u861")) -  Exclude u861
 * repeat() - avoid combinatorial explosion!

 * emit() - modulator to “output” all the traversed vertices
 If emit() is placed after repeat(), it will “output” all vertices leaving the repeat-traversal. If emit() is placed before repeat(), it will “output” the vertices prior to entering the repeat-traversal.

 Controlling the recursion
 repeat(out("route").simplePath())) helps reducing the combinatory explosion due to cyclic loops but we still have a rapid graph expansion.
 '''

list_code = g.V()\
    .has('airport', 'code', 'SYD')\
    .as_("SYD")\
    .repeat(__.out("route")).times(2)\
    .emit()\
    .dedup()\
    .where(P.neq("SYD"))\
    .values("code")\
    .fold()\
    .next()
print(len(list_code))

0


In [107]:
id_for_delete = g.V()\
    .has('airport', 'code', 'SYD')\
    .as_("SYD")\
    .repeat(__.out("route")).times(2)\
    .emit()\
    .dedup()\
    .where(P.neq("SYD"))
counts =  g.V()\
    .has('airport', 'code', 'SYD')\
    .as_("SYD")\
    .repeat(__.out("route")).times(2)\
    .emit()\
    .dedup()\
    .where(P.neq("SYD"))\
    .count()\
    .next()
print('Total counts', counts)

for i in range(counts):
    if i > 10:
        break
    print(id_for_delete.next())



Total counts 0


In [106]:
# Delete data
g.V()\
    .has('airport', 'code', 'SYD')\
    .as_("SYD")\
    .repeat(__.out("route")).times(2)\
    .emit()\
    .where(P.neq("SYD"))\
    .drop()\
    .iterate()

[['V'], ['has', 'airport', 'code', 'SYD'], ['as', 'SYD'], ['repeat', [['out', 'route']]], ['times', 2], ['emit'], ['where', neq(SYD)], ['drop'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

In [15]:
# g.E().has('airport', 'code', 'SYD').drop()
g.E().has('airport', 'code', 'SYD').fold().next()

[]

In [132]:
g.V(findId)\
    .repeat(__.out().simplePath())\
    .emit(__.has('code', 'TPE'))\
    .path()\
    .by('code')\
    .limit(50)\
    .fold()\
    .next()

[path[SYD, TPE],
 path[SYD, SIN, TPE],
 path[SYD, MEL, TPE],
 path[SYD, NRT, TPE],
 path[SYD, DXB, TPE],
 path[SYD, KUL, TPE],
 path[SYD, BKK, TPE],
 path[SYD, ICN, TPE],
 path[SYD, CAN, TPE],
 path[SYD, DPS, TPE],
 path[SYD, NKG, TPE],
 path[SYD, HGH, TPE],
 path[SYD, KMG, TPE],
 path[SYD, CSX, TPE],
 path[SYD, DEL, TPE],
 path[SYD, LAX, TPE],
 path[SYD, HKG, TPE],
 path[SYD, BNE, TPE],
 path[SYD, HNL, TPE],
 path[SYD, PEK, TPE],
 path[SYD, MNL, TPE],
 path[SYD, SGN, TPE],
 path[SYD, CTU, TPE],
 path[SYD, XIY, TPE],
 path[SYD, HAN, TPE],
 path[SYD, WUH, TPE],
 path[SYD, CKG, TPE],
 path[SYD, SFO, TPE],
 path[SYD, YVR, TPE],
 path[SYD, PVG, TPE],
 path[SYD, HND, TPE],
 path[SYD, CGK, TPE],
 path[SYD, SIN, CDG, TPE],
 path[SYD, SIN, MEL, TPE],
 path[SYD, SIN, NRT, TPE],
 path[SYD, SIN, FCO, TPE],
 path[SYD, SIN, DXB, TPE],
 path[SYD, SIN, KUL, TPE],
 path[SYD, SIN, BKK, TPE],
 path[SYD, SIN, ICN, TPE],
 path[SYD, SIN, PNH, TPE],
 path[SYD, SIN, FUK, TPE],
 path[SYD, SIN, CAN, TPE],
 pat