### Write sample data files to disk

In [1]:
%%writefile person.csv
name,gender,age,state
Tom,male,40,ca
Dan,male,34,ny
Jenny,female,25,tx
Kevin,male,28,az
Amily,female,22,ca
Nancy,female,20,ky
Jack,male,26,fl

Overwriting person.csv


In [2]:
%%writefile friendship.csv
person1,person2,date
Tom,Dan,2017-06-03
Tom,Jenny,2015-01-01
Dan,Jenny,2016-08-03
Jenny,Amily,2015-06-08
Dan,Nancy,2016-01-03
Nancy,Jack,2017-03-02
Dan,Kevin,2015-12-30

Overwriting friendship.csv


### Write the graph schema to disk

In [3]:
%%writefile schema.gsql
drop all

create vertex person (primary_id name string, name string, age int, 
                      gender string, state string)
create undirected edge friendship (from person, to person, 
                                   connect_day datetime)

create graph social (person, friendship)


Overwriting schema.gsql


### Load the schema in gsql

In [4]:
!gsql schema.gsql

Dropping all, about 1 minute ...
Abort all active loading jobs
Try to abort all loading jobs on graph social, it may take a while ...
[ABORT_SUCCESS] No active Loading Job to abort.

Shutdown restpp gse gpe ...
Graph store /home/tigergraph/tigergraph/gstore/0/ has been cleared!
Everything is dropped.
The vertex type person is created.
The edge type friendship is created.

Restarting gse gpe restpp ...

Finish restarting services in 29.769 seconds!
The graph social is created.


### Write a data loading job to disk and run it

In [5]:
%%writefile loadsocial.gsql

use graph social 

create loading job load_social for graph social {
    define filename file1="person.csv";
    define filename file2="friendship.csv";
    
    load file1 to vertex person values ($"name", $"name", $"age", $"gender", $"state") using header="true", separator=",";
    
    load file2 to edge friendship values ($"person1", $"person2", $"date") using header="true", separator=",";
    
}

run loading job load_social

Overwriting loadsocial.gsql


In [6]:
!gsql loadsocial.gsql

Using graph 'social'
The job load_social is created.
[Tip: Use "CTRL + C" to stop displaying the loading status update, then use "SHOW LOADING STATUS jobid" to track the loading progress again]
[Tip: Manage loading jobs with "ABORT/RESUME LOADING JOB jobid"]
Starting the following job, i.e.
  JobName: load_social, jobid: social.load_social.file.m1.1592237888429
  Loading log: '/home/tigergraph/tigergraph/logs/restpp/restpp_loader_logs/social/social.load_social.file.m1.1592237888429.log'

Job "social.load_social.file.m1.1592237888429" loading status
[RUNNING] m1 ( Finished: 0 / Total: 2 )
[2A[2KJob "social.load_social.file.m1.1592237888429" loading status
[2K[RUNNING] m1 ( Finished: 2 / Total: 2 )
  [LOADED]
  +---------------------------------------------------------------------------+
  |                       FILENAME |   LOADED LINES |   AVG SPEED |   DURATION|
  |    /home/tigergraph/person.csv |              8 |       7 l/s |     1.00 s|
  |/home/tigergraph/friendship.csv |    

### Define a function to capture and process gsql queries

In [7]:
import subprocess, json

def gsql(text, graph='social'):
    
    comp = subprocess.run(['gsql', '-g ' + graph, text], text=True, capture_output=True)
    
    try:
        json_object = json.loads(comp.stdout)
    except:
        return comp.stdout.replace('\\n','\n')
    else:
        return json_object

### Select queries return a list of dicts

In [8]:
gsql('select count(*) from person')

[{'count': 7, 'v_type': 'person'}]

In [9]:
gsql('select count() from person')

[{'count': 7, 'v_type': 'person'}]

### The `from` argument can be a pattern 

In [10]:
gsql('select count() from person-(friendship)-person')

[{'count': 7, 'e_type': 'friendship'}]

### The `where` arument is a filter on the `from` pattern

In [11]:
gsql('select * from person where primary_id=="Tom"')

[{'v_id': 'Tom',
  'attributes': {'gender': 'male', 'name': 'Tom', 'state': 'ca', 'age': 40},
  'v_type': 'person'}]

In [13]:
gsql('select * from person where gender=="male"')

[{'v_id': 'Jack',
  'attributes': {'gender': 'male', 'name': 'Jack', 'state': 'fl', 'age': 26},
  'v_type': 'person'},
 {'v_id': 'Kevin',
  'attributes': {'gender': 'male', 'name': 'Kevin', 'state': 'az', 'age': 28},
  'v_type': 'person'},
 {'v_id': 'Dan',
  'attributes': {'gender': 'male', 'name': 'Dan', 'state': 'ny', 'age': 34},
  'v_type': 'person'},
 {'v_id': 'Tom',
  'attributes': {'gender': 'male', 'name': 'Tom', 'state': 'ca', 'age': 40},
  'v_type': 'person'}]

In [14]:
[v["attributes"]["age"] for v in 
 gsql('select * from person where gender=="male"')]

[26, 28, 34, 40]

In [15]:
[v["attributes"]["age"] for v in 
 gsql('select age from person where gender=="male" and age < 30')]

[26, 28]

### Queries can saved and compiled

In [16]:
%%writefile hello.gsql

create query hello(vertex<person> p) for graph social{
    
    start = {p};
    tgt = select t from start:s-(friendship:e)-person:t ;
    print tgt;
}

install query hello

Overwriting hello.gsql


In [17]:
!gsql -g social 'drop query hello'

Query hello could not be found.


In [18]:
!gsql -g social hello.gsql

The query hello has been added!
Start installing queries, about 1 minute ...
hello query: curl -X GET 'http://127.0.0.1:9000/query/social/hello?p=VALUE'. Add -H "Authorization: Bearer TOKEN" if authentication is enabled.



In [19]:
gsql('run query hello("Tom")')

{'error': False,
 'message': '',
 'version': {'schema': 0, 'edition': 'developer', 'api': 'v2'},
 'results': [{'tgt': [{'v_id': 'Dan',
     'attributes': {'gender': 'male', 'name': 'Dan', 'state': 'ny', 'age': 34},
     'v_type': 'person'},
    {'v_id': 'Jenny',
     'attributes': {'gender': 'female',
      'name': 'Jenny',
      'state': 'tx',
      'age': 25},
     'v_type': 'person'}]}]}

In [20]:
[v['attributes']['age'] for v in 
 gsql('run query hello("Tom")')['results'][0]['tgt']]

[34, 25]

### Accums store information while transversing the graph

In [21]:
%%writefile hello2.gsql

create query hello2(vertex<person> p) for graph social {
    
    OrAccum @visited = false;
    AvgAccum @@aveAge;
    
    start = {p};
    
    firstHop = select t from start:s-(friendship:e)-person:t
               accum t.@visited += true, s.@visited += true;
    
    secondHop = select t from firstHop:s-(friendship:e)-person:t
                where t.@visited == false
                post_accum @@aveAge += t.age;
    
    print secondHop;
    print @@aveAge;
            
}

install query hello2

Overwriting hello2.gsql


In [22]:
!gsql -g social hello2.gsql

The query hello2 has been added!
Start installing queries, about 1 minute ...
hello2 query: curl -X GET 'http://127.0.0.1:9000/query/social/hello2?p=VALUE'. Add -H "Authorization: Bearer TOKEN" if authentication is enabled.



In [23]:
gsql('run query hello2("Tom")')

{'error': False,
 'message': '',
 'version': {'schema': 0, 'edition': 'developer', 'api': 'v2'},
 'results': [{'secondHop': [{'v_id': 'Amily',
     'attributes': {'gender': 'female',
      '@visited': False,
      'name': 'Amily',
      'state': 'ca',
      'age': 22},
     'v_type': 'person'},
    {'v_id': 'Kevin',
     'attributes': {'gender': 'male',
      '@visited': False,
      'name': 'Kevin',
      'state': 'az',
      'age': 28},
     'v_type': 'person'},
    {'v_id': 'Nancy',
     'attributes': {'gender': 'female',
      '@visited': False,
      'name': 'Nancy',
      'state': 'ky',
      'age': 20},
     'v_type': 'person'}]},
  {'@@aveAge': 23.33333}]}

In [24]:
gsql('run query hello2("Tom")')['results'][1]['@@aveAge']

23.33333

In [25]:
ages = [v['attributes']['age'] for v in 
        gsql('run query hello2("Tom")')['results'][0]['secondHop']]
ages

[22, 28, 20]

In [26]:
sum(ages)/len(ages)

23.333333333333332