## Install pyTigerGraph

In [4]:
# Setup
!pip install pyTigerGraph

[0m

## Add Imports and Establish Initial Connection

In [5]:
# Imports
import pyTigerGraph as tg
import json
import pandas as pd

# Connection parameters
hostName = "https://language.i.tgcloud.io"
userName = "tigergraph"
password = "tigergraph"

conn = tg.TigerGraphConnection(host=hostName, username=userName, password=password)

print("Connected")

Connected


## Clone the Data

In [6]:
# from google.colab import drive
# drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
# cd '/content/drive/MyDrive'

In [None]:
# !git clone https://github.com/sudha-vijayakumar/LanguageModel.git

## Peek into the data 

### ConceptNet - Word

In [None]:
# root='/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/'
root='/Users/sudhavijayakumar/Documents/GitHub/LanguageModel/1_Curated_Data'
nodes = pd.read_csv(root+'WN-nodes.csv')
nodes.head(1)

### ConceptNet - Subject

In [None]:
synsets = pd.read_csv(root+'subject_wn.csv')
synsets.head()

### ConceptNet - Edges

In [None]:
relationships = pd.read_csv(root+'WN-edges.csv',index_col=[0])
relationships.head(5)

### ConceptNet - Part of Speech

In [None]:
relationships = pd.read_csv(root+'encoded/pos_wn.csv',index_col=[0])
relationships.head()

## Define and Publish the Schema

In [None]:
# DEFINE / CREATE ALL EDGES AND VERTICES 
results = conn.gsql('''
  USE GLOBAL
  CREATE VERTEX word(PRIMARY_ID id STRING, uri STRING, name STRING, definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE VERTEX subject(PRIMARY_ID id STRING, type STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE VERTEX pos(PRIMARY_ID id STRING, type STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE has_subject(FROM word, TO subject) WITH REVERSE_EDGE="reverse_has_subject"
  CREATE DIRECTED EDGE part_of_speech(FROM word, TO pos) WITH REVERSE_EDGE="reverse_part_of_speech"
  CREATE DIRECTED EDGE attribute(FROM word, TO word) WITH REVERSE_EDGE="reverse_attribute"
  CREATE DIRECTED EDGE hyponym(FROM word, TO word) WITH REVERSE_EDGE="reverse_hyponym"
  CREATE DIRECTED EDGE entails(FROM word, TO word) WITH REVERSE_EDGE="reverse_entails"
  CREATE DIRECTED EDGE similar(FROM word, TO word) WITH REVERSE_EDGE="reverse_similar"
  CREATE DIRECTED EDGE hypernym(FROM word, TO word) WITH REVERSE_EDGE="reverse_hypernym"
  CREATE DIRECTED EDGE domain_topic(FROM word, TO word) WITH REVERSE_EDGE="reverse_domain_topic"
  CREATE DIRECTED EDGE mero_part(FROM word, TO word) WITH REVERSE_EDGE="reverse_mero_part"
  CREATE DIRECTED EDGE exemplifies(FROM word, TO word) WITH REVERSE_EDGE="reverse_exemplifies"
  CREATE DIRECTED EDGE has_domain_topic(FROM word, TO word) WITH REVERSE_EDGE="reverse_has_domain_topic"
  CREATE DIRECTED EDGE also(FROM word, TO word) WITH REVERSE_EDGE="reverse_also"
  CREATE DIRECTED EDGE mero_substance(FROM word, TO word) WITH REVERSE_EDGE="reverse_mero_substance"
  CREATE DIRECTED EDGE domain_region(FROM word, TO word) WITH REVERSE_EDGE="reverse_domain_region"
  CREATE DIRECTED EDGE holo_part(FROM word, TO word) WITH REVERSE_EDGE="reverse_holo_part"
  CREATE DIRECTED EDGE holo_member(FROM word, TO word) WITH REVERSE_EDGE="reverse_holo_member"
  CREATE DIRECTED EDGE causes(FROM word, TO word) WITH REVERSE_EDGE="reverse_causes"
  CREATE DIRECTED EDGE instance_hyponym(FROM word, TO word) WITH REVERSE_EDGE="reverse_instance_hyponym"
  CREATE DIRECTED EDGE instance_hypernym(FROM word, TO word) WITH REVERSE_EDGE="reverse_instance_hypernym"
  CREATE DIRECTED EDGE mero_member(FROM word, TO word) WITH REVERSE_EDGE="reverse_mero_member"
  CREATE DIRECTED EDGE is_exemplified_by(FROM word, TO word) WITH REVERSE_EDGE="reverse_is_exemplified_by"
  CREATE DIRECTED EDGE holo_substance(FROM word, TO word) WITH REVERSE_EDGE="reverse_holo_substance"
  CREATE DIRECTED EDGE has_domain_region(FROM word, TO word) WITH REVERSE_EDGE="reverse_has_domain_region"
  CREATE DIRECTED EDGE antonym(FROM word, TO word) WITH REVERSE_EDGE="reverse_antonym"

''')
print(results)

# WordNET

## Create WordNet Graph

In [None]:
results = conn.gsql('CREATE GRAPH WordNet(word, subject, pos, has_subject, part_of_speech, attribute, hyponym, entails, similar, hypernym, domain_topic, mero_part, exemplifies, has_domain_topic, also, mero_substance, domain_region, holo_part, holo_member, causes, instance_hyponym, instance_hypernym, mero_member, is_exemplified_by, holo_substance, has_domain_region, antonym)')
print(results)

In [None]:
conn.graphname="WordNet"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
# authToken = 'rc7reopbis1667ksgcppq5v5fb99p6s1'
conn = tg.TigerGraphConnection(host=hostName, graphname="WordNet", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

## Create Loading Jobs

### Word

Let's take a look at what one of our files looks like so we can write a loading job.

In [None]:
# !head -n 2 '/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/WN-nodes.csv'

Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
In this example:
- `$0` is the `uri` column,
- `$1` is `id`,
- `$2` is `word`
- and so on

In [None]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN_edges FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE mero_part VALUES($0, $1) WHERE $2 == "mero_part" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE domain_topic VALUES($0, $1) WHERE $2 == "domain_topic" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE hypernym VALUES($0, $1) WHERE $2 == "hypernym" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE similar VALUES($0, $1) WHERE $2 == "similar" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE entails VALUES($0, $1) WHERE $2 == "entails" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE hyponym VALUES($0, $1) WHERE $2 == "hyponym" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE attribute VALUES($0, $1) WHERE $2 == "attribute" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE antonym VALUES($0, $1) WHERE $2 == "antonym" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE has_domain_region VALUES($0, $1) WHERE $2 == "has_domain_region" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE holo_substance VALUES($0, $1) WHERE $2 == "holo_substance" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE is_exemplified_by VALUES($0, $1) WHERE $2 == "is_exemplified_by" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE mero_member VALUES($0, $1) WHERE $2 == "mero_member" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE instance_hypernym VALUES($0, $1) WHERE $2 == "instance_hypernym" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE instance_hyponym VALUES($0, $1) WHERE $2 == "instance_hyponym" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE causes VALUES($0, $1) WHERE $2 == "causes" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE holo_member VALUES($0, $1) WHERE $2 == "holo_member" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE holo_part VALUES($0, $1) WHERE $2 == "holo_part" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE domain_region VALUES($0, $1) WHERE $2 == "domain_region" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE mero_substance VALUES($0, $1) WHERE $2 == "mero_substance" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE exemplifies VALUES($0, $1) WHERE $2 == "exemplifies" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE has_domain_topic VALUES($0, $1) WHERE $2 == "has_domain_topic" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE also VALUES($0, $1) WHERE $2 == "also" USING SEPARATOR="\t", HEADER="true", EOL="\n", QUOTE="double";

    }

    CREATE LOADING JOB load_job_WN_nodes_encoded FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($1, $0, $2, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE has_subject VALUES($1, $5) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE part_of_speech VALUES($1, $3) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }


    CREATE LOADING JOB load_job_subject_wn FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX subject VALUES($2, $1) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }


  CREATE LOADING JOB load_job_pos_wn FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX pos VALUES($2, $1) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }


  END
  ''')
print(results)

## Load Data

### Words

In [None]:
# Load the posts file wiht the 'load_words' job
load_words = root+'WN-edges.csv'
results = conn.uploadFile(load_words, fileTag='MyDataSource', jobName='load_job_WN_edges')
print(json.dumps(results, indent=2))

In [None]:
# Load the posts file wiht the 'load_words' job
load_words = root+'encoded/WN-nodes-encoded.csv'
results = conn.uploadFile(load_words, fileTag='MyDataSource', jobName='load_job_WN_nodes_encoded')
print(json.dumps(results, indent=2))

### Edges

In [None]:
# Load the posts file wiht the 'load_edges' job
load_edges = root+'encoded/subject_wn.csv'
results = conn.uploadFile(load_edges, fileTag='MyDataSource', jobName='load_job_subject_wn')
print(json.dumps(results, indent=2))

In [None]:
# Load the posts file wiht the 'load_edges' job
load_edges = root+'encoded/pos_wn.csv'
results = conn.uploadFile(load_edges, fileTag='MyDataSource', jobName='load_job_pos_wn')
print(json.dumps(results, indent=2))

## Exploring the Graph

### Get Vertex and Edge Schema

In [None]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

In [None]:

print(f"Results for Post vertex")
pprint(conn.getVertexType("word"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("type"))


## Counting Data

In [None]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

## Extracting Data

### Vertex/Edge Set Format

#### Getting a Vertex

In [None]:
results = conn.getVerticesById("word", "15314760-n")
pprint(results)

#### Or Multiple Vertices

In [None]:
tdf1 = conn.getVerticesById("word", ["15137796-n","15192825-n"])
pprint(tdf1)

#### Count Edges Connected to a Vertex

In [None]:
results = conn.getEdgeCountFrom("word", "15192825-n")
pprint(results)

#### Show all Edges Connected to a Vertex

In [None]:
results = conn.getEdges("word", "15192825-n")
pprint(results)

### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

#### All Vertices of one Type

In [None]:
df1 = conn.getVertexDataframe("word")
print(df1)

#### One or More Vertex

In [None]:
df2 = conn.getVertexDataframeById("word", ["15192825-n"])
print(df2)

#### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [None]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

#### Get Edges

In [None]:
df4 = conn.getEdgesDataframe("word", "15192825-n", limit=3)
print(df4)

## Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [None]:
results = conn.shortestPath([("word", "15192825-n")], [("word", "15161294-n")])
pprint(results)

## Clear the Whole Graph
DANGER ZONE

In [None]:
# conn.gsql('''
# USE GLOBAL
# DROP ALL
# ''')