# Analysis and Visualization

Let's create a visualization of a single meme:

## 0. Setup

In [1]:
import os
import os.path

from kgtk.configure_kgtk_notebooks import ConfigureKGTK
from kgtk.functions import kgtk, kypher

In [2]:
# Parameters

# Folders on local machine where to create the output and temporary files:
input_path = "wikidata"
output_path = "projects"
project_name = "tutorial-kypher"

In [3]:
big_files=["label"]

additional_files = {
    "P31": "derived.P31.tsv.gz",
    "items": "claims.wikibase-item.tsv.gz",
    "P1963": "derived.P1963computed.count.star.tsv.gz",
    "external": "claims.external-id.tsv.gz",
    "indegree": "metadata.in_degree.tsv.gz",
    "outdegree": "metadata.out_degree.tsv.gz",
    "pagerank": "metadata.pagerank.directed.tsv.gz"
}

ck = ConfigureKGTK(big_files)
ck.configure_kgtk(input_graph_path=input_path, 
                  output_path=output_path, 
                  project_name=project_name,
                  additional_files=additional_files)

User home: /Users/filipilievski
Current dir: /Users/filipilievski/mcs/imkg
KGTK dir: /Users/filipilievski/mcs
Use-cases dir: /Users/filipilievski/mcs/use-cases


In [4]:
ck.print_env_variables()

KGTK_OPTION_DEBUG: false
STORE: projects/tutorial-kypher/temp.tutorial-kypher/wikidata.sqlite3.db
KGTK_LABEL_FILE: wikidata/labels.en.tsv.gz
OUT: projects/tutorial-kypher
kypher: kgtk query --graph-cache projects/tutorial-kypher/temp.tutorial-kypher/wikidata.sqlite3.db
TEMP: projects/tutorial-kypher/temp.tutorial-kypher
GRAPH: wikidata
EXAMPLES_DIR: /Users/filipilievski/mcs/examples
kgtk: kgtk
USE_CASES_DIR: /Users/filipilievski/mcs/use-cases
KGTK_GRAPH_CACHE: projects/tutorial-kypher/temp.tutorial-kypher/wikidata.sqlite3.db
label: wikidata/labels.en.tsv.gz
P31: wikidata/derived.P31.tsv.gz
items: wikidata/claims.wikibase-item.tsv.gz
P1963: wikidata/derived.P1963computed.count.star.tsv.gz
external: wikidata/claims.external-id.tsv.gz
indegree: wikidata/metadata.in_degree.tsv.gz
outdegree: wikidata/metadata.out_degree.tsv.gz
pagerank: wikidata/metadata.pagerank.directed.tsv.gz


In [14]:
!kgtk query -i $TEMP/templates.kgtk.gz \
            --match '(:`kym:distracted-boyfriend`)-[r]->()' \
            --where 'r.label in ["kym:parent", "kym:year", "rdf:type", "m4s:fromAbout", "m4s:fromTags", "m4s:fromImage"]' \
            -o $TEMP/db_subject.kgtk.gz

Create node file:

In [15]:
!kgtk query -i $label -i $TEMP/templates.kgtk.gz \
            --match 'templates: ()-[]->(n), \
                label: (n)-[r]->(l)' \
            --return 'n as node1, r.label as label, l as node2' / deduplicate \
            -o $TEMP/labelfile.kgtk.gz 

In [16]:
!kgtk query -i $TEMP/labelfile.kgtk.gz -i $TEMP/db_subject.kgtk.gz \
            --match 'db: ()-[]->(n), \
                label: (n)-[r]->(l)' \
            --return 'n as id, l as label' / deduplicate \
            -o $TEMP/nodefile.kgtk.gz 

In [17]:
kgtk('''
    cat -i $TEMP/nodefile.kgtk.gz
''')

Unnamed: 0,id,label
0,Q100235738,'event'@en
1,Q1030329,'viral video'@en
2,Q1136,'Reddit'@en
3,Q1144593,'sitting'@en
4,Q1151364,'formalwear'@en
5,Q1190554,'occurrence'@en
6,Q1255864,'fun'@en
7,Q12684,'fashion'@en
8,Q12737077,'occupation'@en
9,Q131151,'t-shirt'@en


In [18]:
kgtk('''
    cat -i $TEMP/db_subject.kgtk.gz
''')

Unnamed: 0,node1,label,node2
0,kym:distracted-boyfriend,kym:parent,kym:object-labeling
1,kym:distracted-boyfriend,kym:year,2017
2,kym:distracted-boyfriend,m4s:fromAbout,Q1634416
3,kym:distracted-boyfriend,m4s:fromAbout,Q622404
4,kym:distracted-boyfriend,m4s:fromImage,Q100235738
5,kym:distracted-boyfriend,m4s:fromImage,Q1030329
6,kym:distracted-boyfriend,m4s:fromImage,Q1136
7,kym:distracted-boyfriend,m4s:fromImage,Q1144593
8,kym:distracted-boyfriend,m4s:fromImage,Q1151364
9,kym:distracted-boyfriend,m4s:fromImage,Q1190554


In [19]:
kgtk("""visualize-graph 
        -i $TEMP/db_subject.kgtk.gz
        --node-file $TEMP/nodefile.kgtk.gz
        --show-text above
        --tooltip-column label
        --direction arrow
        --edge-color-column label
        --edge-color-style d3.schemeDark2
        -o distracted.graph.html""")

In [13]:
!grep 'distracted-boyfriend' imkg02/template.kym.nt

<https://knowyourmeme.com/memes/me-vs-you> <https://knowyourmeme.com/memes/sibling> <https://knowyourmeme.com/memes/distracted-boyfriend> .
<https://knowyourmeme.com/memes/respect-women> <https://knowyourmeme.com/memes/sibling> <https://knowyourmeme.com/memes/distracted-boyfriend> .
<https://knowyourmeme.com/memes/distracted-boyfriend> <https://meme4.science/fromImage> <http://www.wikidata.org/entity/Q371174> .
<https://knowyourmeme.com/memes/distracted-boyfriend> <https://meme4.science/fromImage> <http://www.wikidata.org/entity/Q2083958> .
<https://knowyourmeme.com/memes/distracted-boyfriend> <https://knowyourmeme.com/memes/sibling> <https://knowyourmeme.com/memes/scissors-vs-water> .
<https://knowyourmeme.com/memes/distracted-boyfriend> <https://knowyourmeme.com/memes/sibling> <https://knowyourmeme.com/memes/kung-fu-pandas-wuxi-finger-hold> .
<https://knowyourmeme.com/memes/distracted-boyfriend> <https://knowyourmeme.com/memes/sibling> <https://knowyourmeme.com/memes/drowning-high-fi

Issue: graph seems to be missing crucial information!

Next step: improve the node file to include colors for relations