# Script to load wordnet, conceptnet data as separate graphs with single edge.

## Install pyTigerGraph

In [93]:
# Setup
!pip install pyTigerGraph

[0m

## Add Imports and Establish Initial Connection

In [94]:
# Imports
import pyTigerGraph as tg
import json
import pandas as pd

# Connection parameters
hostName = "https://language.i.tgcloud.io/"
userName = "tigergraph"
password = "tigergraph"

conn = tg.TigerGraphConnection(host=hostName, username=userName, password=password)

print("Connected")

Connected


## Clone the Data

In [95]:
# from google.colab import drive
# drive.mount('/content/drive')

In [96]:
# cd '/content/drive/MyDrive'

In [97]:
#To-Do: Comment-out and clone the graph data(*.csv) once.

# !git clone https://github.com/sudha-vijayakumar/LanguageModel.git

## Peek into the data 

### ConceptNet - Word

In [98]:
# root='/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/''
root='/Users/sudhavijayakumar/Documents/GitHub/LanguageModel/1_Curated_Data/csv_imports/'
nodes = pd.read_csv(root+'words.csv')
nodes.head(1)

Unnamed: 0,id:ID,name,pos,conceptUri,:LABEL
0,able.a,able,a,/c/en/able/a,Lemma;Concept


### ConceptNet - Synset

In [99]:
synsets = pd.read_csv(root+'synsets.csv')
synsets.head(1)

Unnamed: 0,id:ID,pos:string,definition:string,:LABEL
0,able.a.01,a,(usually followed by `to') having the necessar...,Synset


### ConceptNet - Edges

In [100]:
relationships = pd.read_csv(root+'relationships.csv',index_col=[0])
relationships.head(5)

Unnamed: 0_level_0,:END_ID,dataset:string,weight:double,:TYPE
:START_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
able.a.01,ability.n.01,/d/wordnet/3.1,2.0,Attribute
able.a.01,ability.n.02,/d/wordnet/3.1,2.0,Attribute
able.a,able.a.01,/d/wordnet/3.1,2.0,InSynset
unable.a.01,ability.n.01,/d/wordnet/3.1,2.0,Attribute
unable.a,unable.a.01,/d/wordnet/3.1,2.0,InSynset


### ConceptNet - Part of Speech

In [101]:
relationships = pd.read_csv(root+'encoded/pos_wn.csv',index_col=[0])
relationships.head()

Unnamed: 0,type,id
0,adjective,0
1,noun,3
2,adverb,2
3,verb,4
4,adjective_satellite,1


## Define and Publish the Schema

In [102]:
# DEFINE / CREATE ALL EDGES AND VERTICES 
results = conn.gsql('''
  USE GLOBAL
  CREATE VERTEX words(PRIMARY_ID id STRING, uri STRING, word STRING, pos STRING,definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE type(FROM words, TO words, typeOf STRING) WITH REVERSE_EDGE="reverse_type"
  CREATE VERTEX word(PRIMARY_ID id STRING, name STRING, pos STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE VERTEX synset(PRIMARY_ID id STRING, definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE is_a(FROM synset, TO word|FROM word, TO synset, type STRING) WITH REVERSE_EDGE="reverse_is_a"
''')
print(results)

Successfully created vertex types: [words].
Successfully created edge types: [type].
Successfully created reverse edge types: [reverse_type].
Successfully created vertex types: [word].
Successfully created vertex types: [synset].
Successfully created edge types: [is_a].
Successfully created reverse edge types: [reverse_is_a].


## WordNET

### Create WordNet Graph

In [103]:
results = conn.gsql('CREATE GRAPH WordNet(words,type,reverse_type)')
print(results)

Stopping GPE GSE RESTPP
Successfully stopped GPE GSE RESTPP in 1.462 seconds
Starting GPE GSE RESTPP
Successfully started GPE GSE RESTPP in 0.077 seconds
The graph WordNet is created.


In [104]:
conn.graphname="WordNet"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
# authToken = 'rc7reopbis1667ksgcppq5v5fb99p6s1'
conn = tg.TigerGraphConnection(host=hostName, graphname="WordNet", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

741jvihniq5o05ga3o1caedar0n1p82d
1bec08177onlqk0h4g8hmqdoabensfr2


### Create Loading Jobs

#### Word

Let's take a look at what one of our files looks like so we can write a loading job.

- Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
  In this example:
  - `$0` is the `uri` column,
  - `$1` is `id`,
  - `$2` is `word`
  - and so on

In [105]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE type VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'WordNet'
Successfully created loading jobs: [load_job_WN].


#### Word Edges

In [106]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN_nodes FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX words VALUES($1, $0, $2, $3, $4) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'WordNet'
Successfully created loading jobs: [load_job_WN_nodes].


### Load Data

#### Words

In [107]:
# Load the posts file wiht the 'load_words' job
load_words = root+'WN-nodes.csv'
results = conn.uploadFile(load_words, fileTag='MyDataSource', jobName='load_job_WN_nodes')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 117792,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "words",
          "validObject": 117792,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


#### Edges

In [108]:
# Load the posts file wiht the 'load_edges' job
load_edges = root+'WN-edges.csv'
results = conn.uploadFile(load_edges, fileTag='MyDataSource', jobName='load_job_WN')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 293652,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [],
      "edge": [
        {
          "typeName": "type",
          "validObject": 293652,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Exploring the Graph

#### Get Vertex and Edge Schema

In [109]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

Verticies: ['words']
Edges: ['type']


In [110]:

print(f"Results for Post vertex")
pprint(conn.getVertexType("words"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("type"))


Results for Post vertex
{
  "Config": {
    "TAGGABLE": false,
    "STATS": "OUTDEGREE_BY_EDGETYPE",
    "PRIMARY_ID_AS_ATTRIBUTE": true
  },
  "Attributes": [
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "uri",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "word",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "pos",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "ST

### Counting Data

In [111]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

Vertex Counts
There are 117792 words vertices in the graph
--------------
Edge Counts
There are 0 type edges in the graph


### Extracting Data

#### Vertex/Edge Set Format

##### Getting a Vertex

In [112]:
results = conn.getVerticesById("words", "15314760-n")
pprint(results)

[
  {
    "v_id": "15314760-n",
    "v_type": "words",
    "attributes": {
      "id": "15314760-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15314760-n",
      "word": "lead_time",
      "pos": "noun",
      "definition": "the time interval between the initiation and the completion of a production process; &quot;the lead times for many publications can vary tremendously&quot;; &quot;planning is an area where lead time can be reduced&quot;@en"
    }
  }
]


##### Or Multiple Vertices

In [113]:
tdf1 = conn.getVerticesById("words", ["15137796-n","15192825-n"])
pprint(tdf1)

[
  {
    "v_id": "15137796-n",
    "v_type": "words",
    "attributes": {
      "id": "15137796-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15137796-n",
      "word": "period",
      "pos": "noun",
      "definition": "an amount of time; &quot;a time period of 30 years&quot;; &quot;hastened the period of time of his recovery&quot;; &quot;Picasso's blue period&quot;@en"
    }
  },
  {
    "v_id": "15192825-n",
    "v_type": "words",
    "attributes": {
      "id": "15192825-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15192825-n",
      "word": "eve",
      "pos": "noun",
      "definition": "the period immediately before something; &quot;on the eve of the French Revolution&quot;@en"
    }
  }
]


##### Count Edges Connected to a Vertex

In [114]:
results = conn.getEdgeCountFrom("words", "15192825-n")
pprint(results)

{
  "type": 1,
  "reverse_type": 1
}


##### Show all Edges Connected to a Vertex

In [115]:
results = conn.getEdges("words", "15192825-n")
pprint(results)

[
  {
    "e_type": "type",
    "directed": true,
    "from_id": "15192825-n",
    "from_type": "words",
    "to_id": "15137796-n",
    "to_type": "words",
    "attributes": {
      "typeOf": "hypernym"
    }
  },
  {
    "e_type": "reverse_type",
    "directed": true,
    "from_id": "15192825-n",
    "from_type": "words",
    "to_id": "15137796-n",
    "to_type": "words",
    "attributes": {
      "typeOf": "hyponym"
    }
  }
]


#### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

##### All Vertices of one Type

In [116]:
df1 = conn.getVertexDataframe("words")
print(df1)

              v_id          id  \
0       15314760-n  15314760-n   
1       15290975-n  15290975-n   
2       15289702-n  15289702-n   
3       15287435-n  15287435-n   
4       15264089-n  15264089-n   
...            ...         ...   
117789  04743169-n  04743169-n   
117790  04311353-n  04311353-n   
117791  04748435-n  04748435-n   
117792  02754634-v  02754634-v   
117793  07771065-n  07771065-n   

                                                   uri               word  \
0       http://wordnet-rdf.princeton.edu/id/15314760-n          lead_time   
1       http://wordnet-rdf.princeton.edu/id/15290975-n     starting_point   
2       http://wordnet-rdf.princeton.edu/id/15289702-n         allegretto   
3       http://wordnet-rdf.princeton.edu/id/15287435-n          split_run   
4       http://wordnet-rdf.princeton.edu/id/15264089-n             season   
...                                                ...                ...   
117789  http://wordnet-rdf.princeton.edu/id/04743169

##### One or More Vertex

In [117]:
df2 = conn.getVertexDataframeById("words", ["15192825-n"])
print(df2)

         v_id          id                                             uri  \
0  15192825-n  15192825-n  http://wordnet-rdf.princeton.edu/id/15192825-n   

  word   pos                                         definition  
0  eve  noun  the period immediately before something; &quot...  


##### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [118]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

         v_id          id                                             uri  \
0  15137796-n  15137796-n  http://wordnet-rdf.princeton.edu/id/15137796-n   
1  15192825-n  15192825-n  http://wordnet-rdf.princeton.edu/id/15192825-n   

     word   pos                                         definition  
0  period  noun  an amount of time; &quot;a time period of 30 y...  
1     eve  noun  the period immediately before something; &quot...  


##### Get Edges

In [119]:
df4 = conn.getEdgesDataframe("words", "15192825-n", limit=3)
print(df4)

  from_type     from_id to_type       to_id    typeOf
0     words  15192825-n   words  15137796-n  hypernym
1     words  15192825-n   words  15137796-n   hyponym


### Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [120]:
results = conn.shortestPath([("words", "15192825-n")], [("words", "15161294-n")])
pprint(results)

[{'type': 'words', 'id': '15192825-n'}]
[{'type': 'words', 'id': '15161294-n'}]
[
  {
    "vertices": [
      {
        "v_id": "15192825-n",
        "v_type": "words",
        "attributes": {
          "id": "15192825-n",
          "uri": "http://wordnet-rdf.princeton.edu/id/15192825-n",
          "word": "eve",
          "pos": "noun",
          "definition": "the period immediately before something; &quot;on the eve of the French Revolution&quot;@en"
        }
      },
      {
        "v_id": "15137796-n",
        "v_type": "words",
        "attributes": {
          "id": "15137796-n",
          "uri": "http://wordnet-rdf.princeton.edu/id/15137796-n",
          "word": "period",
          "pos": "noun",
          "definition": "an amount of time; &quot;a time period of 30 years&quot;; &quot;hastened the period of time of his recovery&quot;; &quot;Picasso's blue period&quot;@en"
        }
      },
      {
        "v_id": "15161294-n",
        "v_type": "words",
        "attributes": 

## ConceptNET

### Create WordNet Graph

In [121]:
results = conn.gsql('CREATE GRAPH ConceptNET(word, synset, is_a, reverse_is_a)')
print(results)

The graph ConceptNET is created.


In [122]:
conn.graphname="ConceptNET"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
conn = tg.TigerGraphConnection(host=hostName, graphname="ConceptNET", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

q8vnu3asej9pg8qgeari7ek3f8a6qaju
sol9jp1gtktj9lp5b9f79ea23sa3u66l


#### Create Loading Jobs

#### Concept/ Words Edges

Let's take a look at what one of our files looks like so we can write a loading job. 

- has_synset(FROM rootWord, TO synset)
is_a_synset_of(FROM synset, TO rootWord)


In [123]:
# !head -n 2 '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/words.csv'

Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
In this example:
- `$0` is the `uri` column,
- `$1` is `id`,
- `$2` is `word`
- and so on

In [124]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_words FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }

    CREATE LOADING JOB load_job_synsets FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX synset VALUES($0, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }


    CREATE LOADING JOB load_job_relationships FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="";
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="";
    }

  END
  ''')
print(results)

Using graph 'ConceptNET'
Successfully created loading jobs: [load_job_words].
Successfully created loading jobs: [load_job_synsets].
Successfully created loading jobs: [load_job_relationships].


#### Concept/ Word Root


In [144]:
# !head -n 2 '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/synsets.csv'

In [145]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_words_csv_1644515929297 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }

  CREATE LOADING JOB load_job_relationships_csv_1644515956497 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }

  CREATE LOADING JOB load_job_synsets_csv_1644515968121 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX synset VALUES($0, $2) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'ConceptNET'
Semantic Check Fails: The job name load_job_words_csv_1644515929297 already exists in other objects!
Failed to create loading jobs: [load_job_words_csv_1644515929297].


#### Concept/ Synset 
- synset(PRIMARY_ID id STRING, pos STRING, definition STRING, label STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"


In [146]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_relationships FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="\n";
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="\n";
    }
  END
  ''')
print(results)

Using graph 'ConceptNET'
Semantic Check Fails: The job name load_job_relationships already exists in other objects!
Failed to create loading jobs: [load_job_relationships].


### Load Data

#### Words

In [147]:
# Load the posts file wiht the 'load_words' job
load_words = root+'words.csv'
results = conn.uploadFile(load_words, timeout='100000',fileTag='MyDataSource', jobName='load_job_words_csv_1644515929297')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 1530137,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "word",
          "validObject": 1530137,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


#### Synsets

In [148]:
# Load the posts file wiht the 'load_synsets' job
load_synsets = root+'synsets.csv'
results = conn.uploadFile(load_synsets, timeout='100000', fileTag='MyDataSource', jobName='load_job_synsets_csv_1644515968121')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 117660,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "synset",
          "validObject": 117660,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


#### Edges

In [149]:
# Load the posts file wiht the 'load_edges' job
load_edges = root+'relationships.csv'
results = conn.uploadFile(load_edges, timeout='100000', fileTag='MyDataSource', jobName='load_job_relationships_csv_1644515956497')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 3406450,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [],
      "edge": [
        {
          "typeName": "is_a",
          "validObject": 3406450,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        },
        {
          "typeName": "is_a",
          "validObject": 3406450,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Exploring the Graph

#### Get Vertex and Edge Schema

In [150]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

Verticies: ['word', 'synset']
Edges: ['is_a']


In [151]:

# print(f"Results for Post vertex")
# pprint(conn.getVertexType("word"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("is_a"))


-----------------
Results for liked edge
{
  "IsDirected": true,
  "ToVertexTypeName": "*",
  "Config": {
    "REVERSE_EDGE": "reverse_is_a"
  },
  "Attributes": [
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "type",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    }
  ],
  "FromVertexTypeName": "*",
  "EdgePairs": [
    {
      "From": "synset",
      "To": "word"
    },
    {
      "From": "word",
      "To": "synset"
    }
  ],
  "Name": "is_a"
}


### Counting Data

In [152]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

Vertex Counts
There are 1647798 word vertices in the graph
There are 1647798 synset vertices in the graph
--------------
Edge Counts
There are 6592698 is_a edges in the graph


### Extracting Data

#### Vertex/Edge Set Format

##### Getting a Vertex

In [153]:
results = conn.getVerticesById("synset", "judicially.r.01")
pprint(results)

[
  {
    "v_id": "judicially.r.01",
    "v_type": "synset",
    "attributes": {
      "id": "judicially.r.01",
      "definition": "as ordered by a court"
    }
  }
]


##### Or Multiple Vertices

In [154]:
tdf1 = conn.getVerticesById("word", ["conflict.v","clash.v.02"])
pprint(tdf1)

[
  {
    "v_id": "conflict.v",
    "v_type": "word",
    "attributes": {
      "id": "conflict.v",
      "name": "conflict",
      "pos": "v"
    }
  },
  {
    "v_id": "clash.v.02",
    "v_type": "word",
    "attributes": {
      "id": "",
      "name": "",
      "pos": ""
    }
  }
]


##### Count Edges Connected to a Vertex

In [155]:
results = conn.getEdgeCountFrom("word", "conflict.v")
pprint(results)

{
  "is_a": 9,
  "reverse_is_a": 7
}


##### Show all Edges Connected to a Vertex

In [156]:
results = conn.getEdges("word", "conflict.v")
pprint(results)

[
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "contrast.v",
    "to_type": "synset",
    "attributes": {
      "type": "MannerOf"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "disagree",
    "to_type": "synset",
    "attributes": {
      "type": "Synonym"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "overlap",
    "to_type": "synset",
    "attributes": {
      "type": "RelatedTo"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "at_odds",
    "to_type": "synset",
    "attributes": {
      "type": "RelatedTo"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "conflict.v.02",
    "to_type": "synset",
    "att

#### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

##### All Vertices of one Type

In [157]:
df1 = conn.getVertexDataframe("word")
print(df1)

                                 v_id                            id  \
0                    friend_to_friend              friend_to_friend   
1        making_objects_appear_closer  making_objects_appear_closer   
2                   conveying_message             conveying_message   
3                   enhance_orchestra             enhance_orchestra   
4                 playing_flute_music           playing_flute_music   
...                               ...                           ...   
1647793        intradepartmental.a.01                                 
1647794                cybernate.v.01                                 
1647795              hystricidae.n.01                                 
1647796                 sedition.n.01                                 
1647797                     dull.s.05                                 

                                 name pos  
0                    friend_to_friend      
1        making_objects_appear_closer      
2              

##### One or More Vertex

In [158]:
df2 = conn.getVertexDataframeById("word", "conflict.v")
print(df2)

         v_id          id      name pos
0  conflict.v  conflict.v  conflict   v


##### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [159]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

         v_id          id      name pos
0  conflict.v  conflict.v  conflict   v
1  clash.v.02                          


##### Get Edges

In [160]:
df4 = conn.getEdgesDataframe("word", "conflict.v",limit=3)
print(df4)

  from_type     from_id to_type       to_id       type
0      word  conflict.v  synset  contrast.v   MannerOf
1      word  conflict.v  synset    disagree    Synonym
2      word  conflict.v  synset     overlap  RelatedTo


### Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [161]:
results = conn.shortestPath([("word", "in_due_time.r")], [("word", "in_due_season.r")])
pprint(results)

[{'type': 'word', 'id': 'in_due_time.r'}]
[{'type': 'word', 'id': 'in_due_season.r'}]
[
  {
    "vertices": [
      {
        "v_id": "when_time_comes.r",
        "v_type": "synset",
        "attributes": {
          "id": "",
          "definition": ""
        }
      },
      {
        "v_id": "in_due_time.r",
        "v_type": "word",
        "attributes": {
          "id": "in_due_time.r",
          "name": "in_due_time",
          "pos": "r"
        }
      },
      {
        "v_id": "in_due_season.r",
        "v_type": "word",
        "attributes": {
          "id": "in_due_season.r",
          "name": "in_due_season",
          "pos": "r"
        }
      }
    ],
    "edges": [
      {
        "e_type": "reverse_is_a",
        "from_id": "when_time_comes.r",
        "from_type": "synset",
        "to_id": "in_due_season.r",
        "to_type": "word",
        "directed": true,
        "attributes": {
          "type": "Synonym"
        }
      },
      {
        "e_type": "is_a",

# Clear the Whole Graph
DANGER ZONE

In [162]:
# conn.gsql('''
# USE GLOBAL
# DROP ALL
# ''')