# KGTK

In [None]:
!pip install kgtk
!pip install ampligraph 

In [1]:
from google.colab import files
!echo "deb http://downloads.skewed.de/apt bionic main" >> /etc/apt/sources.list
!apt-key adv --keyserver keys.openpgp.org --recv-key 612DEFB798507F25
!apt-get update
!apt-get install python3-graph-tool python3-cairo python3-matplotlib
!apt-get install libcairo2-dev

Executing: /tmp/apt-key-gpghome.Qt9VcJq4O8/gpg.1.sh --keyserver keys.openpgp.org --recv-key 612DEFB798507F25
gpg: key 612DEFB798507F25: "Tiago de Paula Peixoto <tiago@skewed.de>" not changed
gpg: Total number processed: 1
gpg:              unchanged: 1
Hit:1 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Hit:2 http://archive.ubuntu.com/ubuntu bionic InRelease
Hit:3 http://archive.ubuntu.com/ubuntu bionic-updates InRelease
Hit:4 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Hit:5 http://archive.ubuntu.com/ubuntu bionic-backports InRelease
Hit:6 http://downloads.skewed.de/apt bionic InRelease
Hit:7 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
Hit:8 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease
Hit:9 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Hit:10 http://security.ubuntu.com/ubuntu bionic-security InRelease
Ign:11 https://developer.download.nvidia.com/compute/machine-learning/repo

In [2]:
import pandas as pd
import json
import requests
import io
import os
import sys
from kgtk.configure_kgtk_notebooks import ConfigureKGTK
from kgtk.functions import kgtk, kypher
from ampligraph.datasets import load_from_csv

In [3]:
url = 'https://ampligraph.s3-eu-west-1.amazonaws.com/datasets/GoT.csv'
open('GoT.csv', 'wb').write(requests.get(url).content)
X = load_from_csv('.', 'GoT.csv', sep=',')

In [None]:
df = pd.DataFrame(X)
df = df.rename({0 : "node1", 1 : "label", 2 : "node2"}, axis = 1)
df.index = df.node1
df = df.drop("node1", axis = 1)
df.reindex()
 
data_dir = '.'
df.to_csv(data_dir + "/data.tsv", sep='\t')
extra_files_config = {"data": "data.tsv"}
open(data_dir + '/extra_files.json', 'w').write(json.dumps(extra_files_config))
 
input_path = data_dir
output_path = data_dir
input_files_url = data_dir
project_name = "DS55-KGTK"
 
files = ["data"]
ck = ConfigureKGTK(files, input_files_url=input_files_url)
ck.configure_kgtk(input_graph_path=input_path,
                  output_path=output_path,
                  project_name=project_name,
                  json_config_file = data_dir + '/extra_files.json',)
 
ck.print_env_variables()
ck.load_files_into_cache()

## 3.1 PageRank

In [None]:
%%time
kgtk("""
    graph-statistics -i "$data" -o $OUT/metadata.pagerank.directed.tsv.gz 
    --compute-pagerank True 
    --compute-hits False 
    --page-rank-property Pdirected_pagerank 
    --vertex-in-degree-property Pindegree
    --vertex-out-degree-property Poutdegree
    --output-degrees True 
    --output-pagerank True 
    --output-hits False \
    --output-statistics-only 
    --undirected False 
    --log-file $TEMP/metadata.pagerank.directed.summary.txt
""")

In [38]:
%%time
kgtk("""
    query -i $OUT/metadata.pagerank.directed.tsv.gz
        --match '(a)-[:Pdirected_pagerank]->(pagerank)'
        --return 'distinct a as node1, pagerank as pagerank'
        --order-by 'cast(pagerank, float) desc'
        --limit 20
""")

CPU times: user 7.21 ms, sys: 12.2 ms, total: 19.4 ms
Wall time: 841 ms


Unnamed: 0,node1,pagerank
0,House Baratheon of King's Landing,0.044739
1,The Crownlands,0.024342
2,House Lannister of Casterly Rock,0.021143
3,The Reach,0.018305
4,House Tyrell of Highgarden,0.018228
5,The Riverlands,0.017569
6,House Baratheon of Storm's End,0.016333
7,House Baratheon of Dragonstone,0.015298
8,The Westerlands,0.014986
9,Tommen Baratheon,0.014424


In [36]:
%%time
kgtk("""
    query -i $OUT/metadata.pagerank.directed.tsv.gz
        --match '(a)-[:Pindegree]->(indegree)'
        --return 'distinct a as node1, indegree as indegree'
        --order-by 'cast(indegree, float) desc'
        --limit 20
""")

CPU times: user 12.5 ms, sys: 9.17 ms, total: 21.7 ms
Wall time: 850 ms


Unnamed: 0,node1,indegree
0,House Lannister of Casterly Rock,133
1,House Frey of the Crossing,119
2,House Tyrell of Highgarden,102
3,House Stark of Winterfell,100
4,House Targaryen of King's Landing,94
5,The Reach,72
6,House Greyjoy of Pyke,67
7,The North,60
8,House Baratheon of King's Landing,56
9,House Baratheon of Storm's End,56


## 3.2 Extended Path

In [47]:
!kgtk query -i $data \
  --match '(a)-[]->()' \
  --where 'a = "House Lannister of Casterly Rock"' 

node1	label	node2
House Lannister of Casterly Rock	LED_BY	Cersei Lannister
House Lannister of Casterly Rock	SWORN_TO	House Baratheon of King's Landing
House Lannister of Casterly Rock	IN_REGION	The Westerlands
House Lannister of Casterly Rock	FOUNDED_BY	Lann
House Lannister of Casterly Rock	BRANCH_OF	House Casterly of Casterly Rock


In [54]:
%%time
kgtk("""
    reachable-nodes -i $data
        --root "House Lannister of Casterly Rock"
        --props ALLIED_WITH
        --label Ally
        --undirected True
    -o $TEMP/allies.tsv
""")

CPU times: user 7.93 ms, sys: 9.79 ms, total: 17.7 ms
Wall time: 850 ms


In [58]:
!kgtk query -i $TEMP/allies.tsv \
  --match '()-[]->(b)' \
  --return 'count(distinct b) as N'

N
1020
