## Install pyTigerGraph

In [104]:
# Setup
!pip install pyTigerGraph



## Add Imports and Establish Initial Connection

In [105]:
# Imports
import pyTigerGraph as tg
import json
import pandas as pd

# Connection parameters
hostName = "https://language.i.tgcloud.io"
userName = "tigergraph"
password = "tigergraph"

conn = tg.TigerGraphConnection(host=hostName, username=userName, password=password)

print("Connected")

Connected


## Clone the Data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
cd '/content/drive/MyDrive'

/content/drive/MyDrive


In [5]:
# !git clone https://github.com/sudha-vijayakumar/LanguageModel.git

## Define and Publish the Schema

In [106]:
# DEFINE / CREATE ALL EDGES AND VERTICES 
results = conn.gsql('''
  USE GLOBAL
  CREATE VERTEX words(PRIMARY_ID id STRING, uri STRING, word STRING, pos STRING,definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE type(FROM words, TO words, typeOf STRING) WITH REVERSE_EDGE="reverse_type"
  CREATE VERTEX word(PRIMARY_ID id STRING, name STRING, pos STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE VERTEX synset(PRIMARY_ID id STRING, definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE is_a(FROM synset, TO word|FROM word, TO synset, type STRING) WITH REVERSE_EDGE="reverse_is_a"
''')
print(results)

Successfully created vertex types: [words].
Successfully created edge types: [type].
Successfully created reverse edge types: [reverse_type].
Successfully created vertex types: [word].
Successfully created vertex types: [synset].
Successfully created edge types: [is_a].
Successfully created reverse edge types: [reverse_is_a].


# WordNET

## Create WordNet Graph

In [107]:
results = conn.gsql('CREATE GRAPH WordNet(words,type,reverse_type)')
print(results)

The graph WordNet is created.


In [108]:
conn.graphname="WordNet"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
# authToken = 'rc7reopbis1667ksgcppq5v5fb99p6s1'
conn = tg.TigerGraphConnection(host=hostName, graphname="WordNet", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

jfnoj3g8ipqpqg8ibisp6agarh2me6pe
1hq42ajl98ub5p1vibp7oid6scf6giqa


## Create Loading Jobs

### Word

Let's take a look at what one of our files looks like so we can write a loading job.

In [109]:
!head -n 2 '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/WN-nodes.csv'

uri,id,word,pos,definition,subject
"http://wordnet-rdf.princeton.edu/id/00001740-a","00001740-a","able","adjective","(usually followed by `to') having the necessary means or skill or know-how or authority to do something; &quot;able to swim&quot;; &quot;she was able to program her computer&quot;; &quot;we were at last able to buy a car&quot;; &quot;able to get a grant for the project&quot;@en","adj.all"


Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
In this example:
- `$0` is the `uri` column,
- `$1` is `id`,
- `$2` is `word`
- and so on

In [110]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE type VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'WordNet'
Successfully created loading jobs: [load_job_WN].


### Word Edges

In [111]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN_nodes FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX words VALUES($1, $0, $2, $3, $4) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'WordNet'
Successfully created loading jobs: [load_job_WN_nodes].


## Load Data

### Words

In [112]:
# Load the posts file wiht the 'load_words' job
load_words = '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/WN-nodes.csv'
results = conn.uploadFile(load_words, fileTag='MyDataSource', jobName='load_job_WN_nodes')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 117792,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "words",
          "validObject": 117792,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Edges

In [113]:
# Load the posts file wiht the 'load_edges' job
load_edges = '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/WN-edges.csv'
results = conn.uploadFile(load_edges, fileTag='MyDataSource', jobName='load_job_WN')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 293652,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [],
      "edge": [
        {
          "typeName": "type",
          "validObject": 293652,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


## Exploring the Graph

### Get Vertex and Edge Schema

In [114]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

Verticies: ['words']
Edges: ['type']


In [115]:

print(f"Results for Post vertex")
pprint(conn.getVertexType("words"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("type"))


Results for Post vertex
{
  "Config": {
    "TAGGABLE": false,
    "STATS": "OUTDEGREE_BY_EDGETYPE",
    "PRIMARY_ID_AS_ATTRIBUTE": true
  },
  "Attributes": [
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "uri",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "word",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "pos",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "ST

## Counting Data

In [116]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

Vertex Counts
There are 22266 words vertices in the graph
--------------
Edge Counts
There are 6017 type edges in the graph


## Extracting Data

### Vertex/Edge Set Format

#### Getting a Vertex

In [117]:
results = conn.getVerticesById("words", "15314760-n")
pprint(results)

[
  {
    "v_id": "15314760-n",
    "v_type": "words",
    "attributes": {
      "id": "15314760-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15314760-n",
      "word": "lead_time",
      "pos": "noun",
      "definition": "the time interval between the initiation and the completion of a production process; &quot;the lead times for many publications can vary tremendously&quot;; &quot;planning is an area where lead time can be reduced&quot;@en"
    }
  }
]


#### Or Multiple Vertices

In [118]:
tdf1 = conn.getVerticesById("words", ["15137796-n","15192825-n"])
pprint(tdf1)

[
  {
    "v_id": "15137796-n",
    "v_type": "words",
    "attributes": {
      "id": "15137796-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15137796-n",
      "word": "period",
      "pos": "noun",
      "definition": "an amount of time; &quot;a time period of 30 years&quot;; &quot;hastened the period of time of his recovery&quot;; &quot;Picasso's blue period&quot;@en"
    }
  },
  {
    "v_id": "15192825-n",
    "v_type": "words",
    "attributes": {
      "id": "15192825-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15192825-n",
      "word": "eve",
      "pos": "noun",
      "definition": "the period immediately before something; &quot;on the eve of the French Revolution&quot;@en"
    }
  }
]


#### Count Edges Connected to a Vertex

In [119]:
results = conn.getEdgeCountFrom("words", "15192825-n")
pprint(results)

{
  "type": 1,
  "reverse_type": 1
}


#### Show all Edges Connected to a Vertex

In [120]:
results = conn.getEdges("words", "15192825-n")
pprint(results)

[
  {
    "e_type": "type",
    "directed": true,
    "from_id": "15192825-n",
    "from_type": "words",
    "to_id": "15137796-n",
    "to_type": "words",
    "attributes": {
      "typeOf": "hypernym"
    }
  },
  {
    "e_type": "reverse_type",
    "directed": true,
    "from_id": "15192825-n",
    "from_type": "words",
    "to_id": "15137796-n",
    "to_type": "words",
    "attributes": {
      "typeOf": "hyponym"
    }
  }
]


### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

#### All Vertices of one Type

In [121]:
df1 = conn.getVertexDataframe("words")
print(df1)

              v_id  ...                                         definition
0       15319328-n  ...  the time interval between the deposit of a che...
1       15314760-n  ...  the time interval between the initiation and t...
2       15309138-n  ...  the velocity of a projectile as it leaves the ...
3       15300301-n  ...  the time constant of an exponential return of ...
4       15294470-n  ...  a definite length of time marked off by two in...
...            ...  ...                                                ...
117789  00323460-n  ...  the forceful insertion of a substance under pr...
117790  04708510-n  ...     a surface coating for ceramics or porcelain@en
117791  00455517-n  ...  angling by drawing a baited line through the w...
117792  11489881-n  ...  electromagnetic radiation with wavelengths lon...
117793  04766235-n  ...  the quality of happening accidentally and by l...

[117794 rows x 6 columns]


#### One or More Vertex

In [122]:
df2 = conn.getVertexDataframeById("words", ["15192825-n"])
print(df2)

         v_id  ...                                         definition
0  15192825-n  ...  the period immediately before something; &quot...

[1 rows x 6 columns]


#### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [123]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

         v_id  ...                                         definition
0  15137796-n  ...  an amount of time; &quot;a time period of 30 y...
1  15192825-n  ...  the period immediately before something; &quot...

[2 rows x 6 columns]


#### Get Edges

In [124]:
df4 = conn.getEdgesDataframe("words", "15192825-n", limit=3)
print(df4)

  from_type     from_id to_type       to_id    typeOf
0     words  15192825-n   words  15137796-n  hypernym
1     words  15192825-n   words  15137796-n   hyponym


## Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [125]:
results = conn.shortestPath([("words", "15192825-n")], [("words", "15161294-n")])
pprint(results)

[{'type': 'words', 'id': '15192825-n'}]
[{'type': 'words', 'id': '15161294-n'}]
[
  {
    "vertices": [
      {
        "v_id": "15161294-n",
        "v_type": "words",
        "attributes": {
          "id": "15161294-n",
          "uri": "http://wordnet-rdf.princeton.edu/id/15161294-n",
          "word": "midweek",
          "pos": "noun",
          "definition": "the middle of a week@en"
        }
      },
      {
        "v_id": "15137796-n",
        "v_type": "words",
        "attributes": {
          "id": "15137796-n",
          "uri": "http://wordnet-rdf.princeton.edu/id/15137796-n",
          "word": "period",
          "pos": "noun",
          "definition": "an amount of time; &quot;a time period of 30 years&quot;; &quot;hastened the period of time of his recovery&quot;; &quot;Picasso's blue period&quot;@en"
        }
      },
      {
        "v_id": "15192825-n",
        "v_type": "words",
        "attributes": {
          "id": "15192825-n",
          "uri": "http://wordnet

# ConceptNET

## Create WordNet Graph

In [126]:
results = conn.gsql('CREATE GRAPH ConceptNET(word, synset, is_a, reverse_is_a)')
print(results)

The graph ConceptNET is created.


In [127]:
conn.graphname="ConceptNET"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
conn = tg.TigerGraphConnection(host=hostName, graphname="ConceptNET", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

49dpis1ofc8rt0p8b6r5rv4c8okllvf5
d9m3g7u91cfbb6a01paf0ecf18is2u55


## Create Loading Jobs

### Concept/ Words Edges

Let's take a look at what one of our files looks like so we can write a loading job. 

- has_synset(FROM rootWord, TO synset)
is_a_synset_of(FROM synset, TO rootWord)


In [128]:
!head -n 2 '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/words.csv'

id:ID,name,pos,conceptUri,:LABEL
able.a,able,a,/c/en/able/a,Lemma;Concept


Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
In this example:
- `$0` is the `uri` column,
- `$1` is `id`,
- `$2` is `word`
- and so on

In [129]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_words FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }

    CREATE LOADING JOB load_job_synsets FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX synset VALUES($0, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }


    CREATE LOADING JOB load_job_relationships FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="";
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="";
    }

  END
  ''')
print(results)

Using graph 'ConceptNET'
Successfully created loading jobs: [load_job_words].
Successfully created loading jobs: [load_job_synsets].
Successfully created loading jobs: [load_job_relationships].


### Concept/ Word Root


In [130]:
!head -n 2 '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/synsets.csv'

id:ID,pos:string,definition:string,:LABEL
able.a.01,a,(usually followed by `to') having the necessary means or skill or know-how or authority to do something,Synset


In [131]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_words_csv_1644515929297 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }

  CREATE LOADING JOB load_job_relationships_csv_1644515956497 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }

  CREATE LOADING JOB load_job_synsets_csv_1644515968121 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX synset VALUES($0, $2) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'ConceptNET'
Successfully created loading jobs: [load_job_words_csv_1644515929297].
Successfully created loading jobs: [load_job_relationships_csv_1644515956497].
Successfully created loading jobs: [load_job_synsets_csv_1644515968121].


### Concept/ Synset 
- synset(PRIMARY_ID id STRING, pos STRING, definition STRING, label STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"


In [132]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_relationships FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="\n";
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="\n";
    }
  END
  ''')
print(results)

Using graph 'ConceptNET'
Semantic Check Fails: The job name load_job_relationships already exists in other objects!
Failed to create loading jobs: [load_job_relationships].


## Load Data

### Words

In [133]:
# Load the posts file wiht the 'load_words' job
load_words = '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/words.csv'
results = conn.uploadFile(load_words, timeout='100000',fileTag='MyDataSource', jobName='load_job_words_csv_1644515929297')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 1530137,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "word",
          "validObject": 1530137,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Synsets

In [134]:
# Load the posts file wiht the 'load_synsets' job
load_synsets = '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/synsets.csv'
results = conn.uploadFile(load_synsets, timeout='100000', fileTag='MyDataSource', jobName='load_job_synsets_csv_1644515968121')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 117660,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "synset",
          "validObject": 117660,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Edges

In [135]:
# Load the posts file wiht the 'load_edges' job
load_edges = '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/relationships.csv'
results = conn.uploadFile(load_edges, timeout='100000', fileTag='MyDataSource', jobName='load_job_relationships_csv_1644515956497')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 2831679,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [],
      "edge": [
        {
          "typeName": "is_a",
          "validObject": 2831679,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        },
        {
          "typeName": "is_a",
          "validObject": 2831679,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


## Exploring the Graph

### Get Vertex and Edge Schema

In [136]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

Verticies: ['word', 'synset']
Edges: ['is_a']


In [137]:

# print(f"Results for Post vertex")
# pprint(conn.getVertexType("word"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("is_a"))


-----------------
Results for liked edge
{
  "IsDirected": true,
  "ToVertexTypeName": "*",
  "Config": {
    "REVERSE_EDGE": "reverse_is_a"
  },
  "Attributes": [
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "type",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    }
  ],
  "FromVertexTypeName": "*",
  "EdgePairs": [
    {
      "From": "synset",
      "To": "word"
    },
    {
      "From": "word",
      "To": "synset"
    }
  ],
  "Name": "is_a"
}


## Counting Data

In [138]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

Vertex Counts
There are 1610875 word vertices in the graph
There are 429201 synset vertices in the graph
--------------
Edge Counts
There are 5503998 is_a edges in the graph


## Extracting Data

### Vertex/Edge Set Format

#### Getting a Vertex

In [139]:
results = conn.getVerticesById("synset", "judicially.r.01")
pprint(results)

[
  {
    "v_id": "judicially.r.01",
    "v_type": "synset",
    "attributes": {
      "id": "judicially.r.01",
      "definition": "as ordered by a court"
    }
  }
]


#### Or Multiple Vertices

In [140]:
tdf1 = conn.getVerticesById("word", ["conflict.v","clash.v.02"])
pprint(tdf1)

[
  {
    "v_id": "conflict.v",
    "v_type": "word",
    "attributes": {
      "id": "conflict.v",
      "name": "conflict",
      "pos": "v"
    }
  },
  {
    "v_id": "clash.v.02",
    "v_type": "word",
    "attributes": {
      "id": "",
      "name": "",
      "pos": ""
    }
  }
]


#### Count Edges Connected to a Vertex

In [141]:
results = conn.getEdgeCountFrom("word", "conflict.v")
pprint(results)

{
  "is_a": 9,
  "reverse_is_a": 4
}


#### Show all Edges Connected to a Vertex

In [142]:
results = conn.getEdges("word", "conflict.v")
pprint(results)

[
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "overlap",
    "to_type": "synset",
    "attributes": {
      "type": "RelatedTo"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "incompatible",
    "to_type": "synset",
    "attributes": {
      "type": "RelatedTo"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "disagree",
    "to_type": "synset",
    "attributes": {
      "type": "RelatedTo"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "contrast.v",
    "to_type": "synset",
    "attributes": {
      "type": "MannerOf"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "at_odds",
    "to_type": "synset",
    "at

### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

#### All Vertices of one Type

In [143]:
df1 = conn.getVertexDataframe("word")
print(df1)

                                 v_id  ... pos
0             native_american_studies  ...    
1                    friend_to_friend  ...    
2        making_objects_appear_closer  ...    
3          expressing_human_condition  ...    
4                   conveying_message  ...    
...                               ...  ...  ..
1647793                    clear.s.05  ...    
1647794                   church.n.04  ...    
1647795                   pistol.n.01  ...    
1647796                 colaptes.n.01  ...    
1647797           record_changer.n.01  ...    

[1647798 rows x 4 columns]


#### One or More Vertex

In [144]:
df2 = conn.getVertexDataframeById("word", "conflict.v")
print(df2)

         v_id          id      name pos
0  conflict.v  conflict.v  conflict   v


#### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [145]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

         v_id          id      name pos
0  conflict.v  conflict.v  conflict   v
1  clash.v.02                          


#### Get Edges

In [146]:
df4 = conn.getEdgesDataframe("word", "conflict.v",limit=3)
print(df4)

  from_type     from_id to_type         to_id       type
0      word  conflict.v  synset       overlap  RelatedTo
1      word  conflict.v  synset  incompatible  RelatedTo
2      word  conflict.v  synset      disagree  RelatedTo


## Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [147]:
results = conn.shortestPath([("word", "in_due_time.r")], [("word", "in_due_season.r")])
pprint(results)

[{'type': 'word', 'id': 'in_due_time.r'}]
[{'type': 'word', 'id': 'in_due_season.r'}]
[
  {
    "vertices": [
      {
        "v_id": "in_due_season.r",
        "v_type": "word",
        "attributes": {
          "id": "in_due_season.r",
          "name": "in_due_season",
          "pos": "r"
        }
      },
      {
        "v_id": "in_due_course.r.01",
        "v_type": "synset",
        "attributes": {
          "id": "in_due_course.r.01",
          "definition": "at the appropriate time"
        }
      },
      {
        "v_id": "in_due_time.r",
        "v_type": "word",
        "attributes": {
          "id": "in_due_time.r",
          "name": "in_due_time",
          "pos": "r"
        }
      }
    ],
    "edges": [
      {
        "e_type": "reverse_is_a",
        "from_id": "in_due_course.r.01",
        "from_type": "synset",
        "to_id": "in_due_season.r",
        "to_type": "word",
        "directed": true,
        "attributes": {
          "type": "InSynset"
        