In [1]:
import pickle

In [2]:
with open('pickle/star-trek_network_graph.pickle', 'rb') as f:
    network = pickle.load(f)

## Classes' names

In [3]:
wikidata_types = set([n[1]['type'] for n in network.nodes(data=True)])

In [4]:
len(wikidata_types)

46

In [5]:
wikidata_types

{'American comic book',
 'MediaWiki site',
 'WikiProject',
 'Wikimedia article page',
 'Wikimedia portal',
 'Wikimedia project page',
 'a priori language',
 'album',
 'book',
 'cinematography',
 'clip show',
 'comic strip',
 'computing platform',
 'country',
 'episode',
 'fictional universe',
 'film',
 'film genre',
 'film series',
 'genre',
 'handheld game console',
 'language',
 'limited series',
 'list of Star Trek: The Next Generation episodes',
 'literary genre',
 'magazine',
 'media franchise',
 'musical ensemble',
 'natural language',
 'release',
 'remix album',
 'science fiction genre',
 'series',
 'short film',
 'single',
 'soundtrack album',
 'television genre',
 'television program',
 'television season',
 'television series',
 'timeline',
 'video game',
 'video game console',
 'video game genre',
 'web portal',
 'webcomic'}

In [100]:
# Manually assigned
sylva_classes = {
    'American comic book': 'Comic',
    'MediaWiki site': 'Web',
    'WikiProject': 'Web',
    'Wikimedia article page': 'Web',
    'Wikimedia portal': 'Web',
    'Wikimedia project page': 'Web',
    'a priori language': 'Language',
    'agglutinative language': 'Language',
    'album': 'Album',
    'animated series': 'Animated series',
    'book': 'Book',
    'cinematography': 'Cinematography',
    'clip show': 'Episode',
    'comic strip': 'Comic',
    'computing platform': 'Video game console',
    'constructed language': 'Language',
    'country': 'Country',
    'episode': 'Episode',
    'fictional language': 'Language',
    'fictional universe': 'Fictional Universe',
    'film': 'Film',
    'film genre': 'Genre',
    'film series': 'Film series',
    'genre': 'Genre',
    'handheld game console': 'Video game console',
    'language': 'Language',
    'limited series': 'Series',
    'list of Star Trek: The Next Generation episodes': 'Series',
    'literary genre': 'Genre',
    'magazine': 'Magazine',
    'media franchise': 'Series',
    'musical ensemble': 'Music',
    'natural language': 'Language',
    'release': 'Release',
    'remix album': 'Music',
    'science fiction genre': 'Genre',
    'series': 'Series',
    'series finale': 'Episode',
    'short film': 'Film',
    'single': 'Music',
    'soundtrack album': 'Music',
    'television genre': 'Genre',
    'television program': 'TV program',
    'television season': 'TV season',
    'television series': 'TV series',
    'timeline': 'Timeline',
    'video game': 'Video game',
    'video game console': 'Video game console',
    'video game genre': 'Genre',
    'web portal': 'Web',
    'web series': 'Web series',
    'webcomic': 'Web',
    'website': 'Web',
}

In [101]:
wikidata_types.difference(set(sylva_classes.keys()))

set()

In [102]:
len(sylva_classes)

53

In [103]:
unique_sylva_classes = set(sylva_classes.values())

In [104]:
len(unique_sylva_classes)

24

In [105]:
unique_sylva_classes

{'Album',
 'Animated series',
 'Book',
 'Cinematography',
 'Comic',
 'Country',
 'Episode',
 'Fictional Universe',
 'Film',
 'Film series',
 'Genre',
 'Language',
 'Magazine',
 'Music',
 'Release',
 'Series',
 'TV program',
 'TV season',
 'TV series',
 'Timeline',
 'Video game',
 'Video game console',
 'Web',
 'Web series'}

In [106]:
with open('pickle/star-trek_sylva_classes_dict.pickle', 'wb') as f:
    pickle.dump(sylva_classes, f)

## Relationships' names

In [107]:
wikidata_relationships = set([e[2]['label'] for e in network.edges(data=True)])

In [108]:
wikidata_relationships

{'based on',
 "category's main topic",
 'country',
 'country of origin',
 'describes the fictional universe',
 'fictional universe described in',
 'filming location',
 'followed by',
 'follows',
 'from fictional universe',
 'genre',
 'inspired by',
 'language of work (or name)',
 'list of episodes',
 'main subject',
 'original language of work',
 'part of',
 'platform',
 'series',
 "topic's main category",
 'website account on'}

In [109]:
# Manually assigned
sylva_relationships = {
    'based on': 'based on',
    "category's main topic": "category's main topic",
    'country': 'country',
    'country of origin': 'country',
    'describes the fictional universe': 'describes the fictional universe',
    'fictional universe described in': 'fictional universe described in',
    'filming location': 'filming location',
    'followed by': 'followed by',
    'follows': 'follows',
    'from fictional universe': 'from fictional universe',
    'genre': 'genre',
    'inspired by': 'based on',
    'language of work (or name)': 'language',
    'list of episodes': 'list of episodes',
    'main subject': 'main subject',
    'original language of work': 'language',
    'part of': 'part of',
    'platform': 'platform',
    'series': 'series',
    "topic's main category": "topic's main category",
    'website account on': 'website account on',
}

In [110]:
len(sylva_relationships)

21

In [111]:
unique_sylva_relationships = set(sylva_relationships.values())

In [112]:
len(unique_sylva_relationships)

18

In [113]:
unique_sylva_relationships

{'based on',
 "category's main topic",
 'country',
 'describes the fictional universe',
 'fictional universe described in',
 'filming location',
 'followed by',
 'follows',
 'from fictional universe',
 'genre',
 'language',
 'list of episodes',
 'main subject',
 'part of',
 'platform',
 'series',
 "topic's main category",
 'website account on'}

In [114]:
with open('pickle/star-trek_sylva_relationships_dict.pickle', 'wb') as f:
    pickle.dump(sylva_relationships, f)

## Classes' attributes

In [115]:
from collections import defaultdict

In [118]:
class_attributes = defaultdict(set)

In [119]:
for ide, info in network.nodes(data=True):
    attrs = set(info.keys())
    t = info['type']
    c = sylva_classes[t]
    class_attributes[c] = class_attributes[c].union(attrs)

class_attributes = dict(class_attributes)

In [125]:
class_attributes

{'Album': {'Freebase identifier',
  'description',
  'id',
  'name',
  'type',
  'wikilink'},
 'Book': {'Freebase identifier',
  'ISBN-10',
  'ISBN-13',
  'OCLC control number',
  'based on',
  'description',
  'id',
  'name',
  'publication date',
  'series',
  'type',
  'wikilink'},
 'Cinematography': {'description', 'id', 'name', 'type', 'wikilink'},
 'Comic': {'depicts',
  'description',
  'id',
  'issue',
  'license',
  'name',
  'publication date',
  'reference URL',
  'type',
  'wikilink'},
 'Country': {'description', 'id', 'name', 'type', 'wikilink'},
 'Episode': {'Freebase identifier',
  'IMDb identifier',
  'description',
  'filming location',
  'followed by',
  'follows',
  'id',
  'main subject',
  'name',
  'publication date',
  'title',
  'type',
  'wikilink'},
 'Fictional Universe': {'description',
  'fictional universe described in',
  'id',
  'name',
  'type',
  'wikilink'},
 'Film': {'(OBSOLETE) title (use P1476, "title")',
  'AllMovie movie ID',
  'AlloCiné movie ID'

## Relationships' attributes

In [122]:
relationship_attributes = defaultdict(set)

In [123]:
for id1, id2, info in network.edges(data=True):
    attrs = set(info.keys())
    label = info['label']
    r = sylva_relationships[label]
    relationship_attributes[r] = relationship_attributes[r].union(attrs)

relationship_attributes = dict(relationship_attributes)

In [124]:
relationship_attributes

{'based on': {'label'},
 "category's main topic": {'label'},
 'country': {'label', 'publication date'},
 'describes the fictional universe': {'label'},
 'fictional universe described in': {'label'},
 'filming location': {'label'},
 'followed by': {'label'},
 'follows': {'label'},
 'from fictional universe': {'label'},
 'genre': {'label'},
 'language': {'label'},
 'list of episodes': {'label'},
 'main subject': {'label'},
 'part of': {'is a list of', 'label'},
 'platform': {'Metacritic ID', 'label'},
 'series': {'label'},
 "topic's main category": {'label'},
 'website account on': {'label'}}

## File system

In [130]:
import csv
import os
import shutil

In [135]:
shutil.rmtree('sylva')
os.makedirs('sylva')
os.makedirs('sylva/nodes')
os.makedirs('sylva/relationships')

In [136]:
for c in unique_sylva_classes:
    open(os.path.join('sylva/nodes', c + '.csv'), 'w')
for p in unique_sylva_relationships:
    open(os.path.join('sylva/relationships', p + '.csv'), 'w')

## Schema

In [159]:
schema = {}
schema['allowedEdges'] = []
for p in properties_tuple3:
    d = {
        'label': p[1],
        'properties': {},
        'source': p[0],
        'target': p[2],
    }
    schema['allowedEdges'].append(d)
schema['nodeTypes'] = {}
for t in ts:
    schema['nodeTypes'][t] = {}
    schema['nodeTypes'][t]['name'] = {
        'auto': 'null',
        'datatype': 'd',
        'default': '',
        'description': '',
        'display': 'true',
        'required': 'false',
        'slug': 'date-1000',
        'validation': 'null',
        'value': ''
    }
    schema['nodeTypes'][t]['date'] = {
        'auto': 'null',
        'datatype': 'u',
        'default': '',
        'description': '',
        'display': 'true',
        'required': 'false',
        'slug': 'name-1000',
        'validation': 'null',
        'value': ''
    }

NameError: name 'properties_tuple3' is not defined

In [158]:
l.`

IndexError: pop index out of range

In [144]:
for e in os.listdir('sylva/nodes'):
    with open(os.path.join('sylva/nodes', e), 'a') as f:
        writer = csv.writer(
            f,
            delimiter=',',
            quotechar='"',
            quoting=csv.QUOTE_ALL
        )
        c = e[:-4]
        attrs = set(class_attributes[c])
        row = ['id', 'type']
        attrs.remove('id')
        attrs.remove('type')
        row.extend(attrs)
        writer.writerow(row)
for e in os.listdir('sylva/relationships'):
    with open(os.path.join('sylva/relationships', e), 'a') as f:
        writer = csv.writer(
            f,
            delimiter=',',
            quotechar='"',
            quoting=csv.QUOTE_ALL
        )
        r = e[:-4]
        attrs = set(relationship_attributes[r])
        row = ['source id', 'target id', 'label']
        attrs.remove('label')
        row.extend(attrs)
        writer.writerow(row)

{'type', 'wikilink', 'name', 'description', 'id'}
{'wikilink', 'description', 'id', 'name', 'type'}
{'wikilink', 'description', 'id', 'Freebase identifier', 'name', 'type'}
{'type', 'wikilink', 'name', 'description', 'id'}
{'Instagram username', 'Commons category', 'ISFDB series ID', 'description', 'Freebase identifier', 'name', 'wikilink', 'Twitter username', 'official website', 'Facebook ID', 'id', 'publication date', 'type'}


KeyError: 'Animated series'

In [None]:
schema = {}
schema['allowedEdges'] = []
for p in properties_tuple3:
    d = {
        'label': p[1],
        'properties': {},
        'source': p[0],
        'target': p[2],
    }
    schema['allowedEdges'].append(d)
schema['nodeTypes'] = {}
for t in ts:
    schema['nodeTypes'][t] = {}
    schema['nodeTypes'][t]['name'] = {
        'auto': 'null',
        'datatype': 'd',
        'default': '',
        'description': '',
        'display': 'true',
        'required': 'false',
        'slug': 'date-1000',
        'validation': 'null',
        'value': ''
    }
    schema['nodeTypes'][t]['date'] = {
        'auto': 'null',
        'datatype': 'u',
        'default': '',
        'description': '',
        'display': 'true',
        'required': 'false',
        'slug': 'name-1000',
        'validation': 'null',
        'value': ''
    }