## Imports

In [2]:
import pprint
import pandas as pd
import codecs
import pickle
import surf
from tqdm import tqdm
import requests
from rdflib.term import URIRef
import knowledge_base
from knowledge_base import KnowledgeBase
from tabulate import tabulate

In [3]:
pd.__version__

u'0.20.3'

In [4]:
# experimenting with dask
import dask
from dask import compute, delayed
from dask.diagnostics import ProgressBar
from dask.multiprocessing import get as mp_get

In [6]:
kb = KnowledgeBase("/Users/rromanello/Documents/ClassicsCitations/hucit_kb/knowledge_base/config/virtuoso.ini")

In [7]:
len(kb.get_authors())

1548

In [8]:
kb.get_resource_by_urn("urn:cts:cwkb:711.1448")

HucitWork (title=[De ventis (@la)],urn=urn:cts:cwkb:711.1448)

In [5]:
%%time
stats = kb.get_statistics()

CPU times: user 33.5 s, sys: 16.3 s, total: 49.8 s
Wall time: 4min 27s


In [6]:
pprint.pprint(stats)

{'number_author_abbreviations': 774,
 'number_author_names': 4842,
 'number_authors': 1548,
 'number_title_abbreviations': 2377,
 'number_work_titles': 10354,
 'number_works': 5199}


In [8]:
kb.author_names?

In [20]:
search_results = kb.search("Homer")

In [26]:
homer = search_results[1][1]

In [28]:
print homer.to_json()

{
  "name_abbreviations": [
    "Hom."
  ], 
  "urn": "urn:cts:greekLit:tlg0012", 
  "works": [
    {
      "urn": "urn:cts:cwkb:927.2814", 
      "titles": [
        {
          "language": "la", 
          "label": "Epigrammata"
        }
      ], 
      "uri": "http://purl.org/hucit/kb/works/2814", 
      "title_abbreviations": [
        "epigr."
      ]
    }, 
    {
      "urn": "urn:cts:greekLit:tlg0012.tlg001", 
      "titles": [
        {
          "language": "it", 
          "label": "Iliade"
        }, 
        {
          "language": "la", 
          "label": "Ilias"
        }, 
        {
          "language": "en", 
          "label": "Iliad"
        }, 
        {
          "language": "de", 
          "label": "Ilias"
        }, 
        {
          "language": "fr", 
          "label": "L'Iliade"
        }
      ], 
      "uri": "http://purl.org/hucit/kb/works/2815", 
      "title_abbreviations": [
        "Il."
      ]
    }, 
    {
      "urn": "urn:cts:greekLit:tlg001

## TODO

### fetch `sameAs` for the works

- for each author
    - for each work
        - get the cwkb URL
        - parse the turtle and extract the sameAs pointing to the catalog
        - return
        
then do the same for the authors

## Gather basic stats

In [40]:
author_abbreviations = [(len(author.get_abbreviations())) for author in kb.get_authors()]

In [41]:
author_names = [(len(author.get_names())) for author in kb.get_authors()]

In [42]:
work_abbreviations = [(len(work.get_abbreviations())) for author in kb.get_authors() for work in author.get_works()]

In [36]:
work_titles = [(len(work.get_titles())) for author in kb.get_authors() for work in author.get_works()]

In [43]:
stats = []

stats.append({
        "label" : "Author names", 
        "total" : pd.Series(author_names).sum(),
        "min" : pd.Series(author_names).min(),
        "max" : pd.Series(author_names).max(),
        "variance" : pd.Series(author_names).var(),
        "mean" : pd.Series(author_names).mean()
})

stats.append({
        "label" : "Author abbreviations", 
        "total" :  pd.Series(author_abbreviations).sum(),
        "min" : pd.Series(author_abbreviations).min(),
        "max" : pd.Series(author_abbreviations).max(),
        "variance" : pd.Series(author_abbreviations).var(),
        "mean" : pd.Series(author_abbreviations).mean()
})

stats.append({
        "label" : "Work titles", 
        "total" : pd.Series(work_titles).sum(),
        "min" : pd.Series(work_titles).min(),
        "max" : pd.Series(work_titles).max(),
        "variance" : pd.Series(work_titles).var(),
        "mean" : pd.Series(work_titles).mean()
})


stats.append({
        "label" : "Work abbreviations", 
        "total" : pd.Series(work_abbreviations).sum(),
        "min" : pd.Series(work_abbreviations).min(),
        "max" : pd.Series(work_abbreviations).max(),
        "variance" : pd.Series(work_abbreviations).var(),
        "mean" : pd.Series(work_abbreviations).mean()
})

df_stats = pd.DataFrame(stats, index=[stat["label"] for stat in stats])

In [44]:
df_stats[["total", "min", "max", "mean", "variance"]]

Unnamed: 0,total,min,max,mean,variance
Author names,4842,1,27,3.127907,9.812976
Author abbreviations,774,0,2,0.5,0.26309
Work titles,10354,1,31,1.991537,6.417397
Work abbreviations,2377,0,3,0.457203,0.574496


In [45]:
df_stats.to_latex()

u'\\begin{tabular}{llrrrrr}\n\\toprule\n{} &                 label &  max &      mean &  min &  total &  variance \\\\\n\\midrule\nAuthor names         &          Author names &   27 &  3.127907 &    1 &   4842 &  9.812976 \\\\\nAuthor abbreviations &  Author abbreviations &    2 &  0.500000 &    0 &    774 &  0.263090 \\\\\nWork titles          &           Work titles &   31 &  1.991537 &    1 &  10354 &  6.417397 \\\\\nWork abbreviations   &    Work abbreviations &    3 &  0.457203 &    0 &   2377 &  0.574496 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [9]:
print tabulate(df_stats[["total", "min", "max", "mean", "variance"]]
               , ["", "Total", "Min", "Max", "Mean", "Variance"], tablefmt="pipe")

|                      |   Total |   Min |   Max |     Mean |   Variance |
|:---------------------|--------:|------:|------:|---------:|-----------:|
| Author names         |    4842 |     1 |    27 | 3.12791  |   9.81298  |
| Author abbreviations |     774 |     0 |     2 | 0.5      |   0.26309  |
| Work titles          |   10354 |     1 |    31 | 1.99154  |   6.4174   |
| Work abbreviations   |    2377 |     0 |     3 | 0.457203 |   0.574496 |


In [7]:
print tabulate(df_stats[["total", "min", "max", "mean", "variance"]]
               , ["", "Total", "Min", "Max", "Mean", "Variance"], tablefmt="latex", floatfmt=".2f")

\begin{tabular}{lrrrrr}
\hline
                      &    Total &   Min &   Max &   Mean &   Variance \\
\hline
 Author names         &  4842.00 &  1.00 & 27.00 &   3.13 &       9.81 \\
 Author abbreviations &   774.00 &  0.00 &  2.00 &   0.50 &       0.26 \\
 Work titles          & 10354.00 &  1.00 & 31.00 &   1.99 &       6.42 \\
 Work abbreviations   &  2377.00 &  0.00 &  3.00 &   0.46 &       0.57 \\
\hline
\end{tabular}


In [48]:
df_stats.to_csv("./kb_stats.csv")

In [5]:
df_stats = pd.read_csv("./kb_stats.csv", index_col="label")

In [11]:
df_stats.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, Author names to Work abbreviations
Data columns (total 6 columns):
Unnamed: 0    4 non-null object
max           4 non-null int64
mean          4 non-null float64
min           4 non-null int64
total         4 non-null int64
variance      4 non-null float64
dtypes: float64(2), int64(3), object(1)
memory usage: 224.0 bytes


## Add `owl:sameAs` for CWKB.org

In [7]:
cwkb_uri_author_template = "http://cwkb.org/author/id/%i/turtle"
cwkb_uri_work_template = "http://cwkb.org/work/id/%i/turtle"

def to_cwkb_uri(author, template="http://cwkb.org/author/id/%i/turtle"):
    return template % int(str(author.subject).split("/")[-1])

In [8]:
cwkb_author_uris = {a.subject:to_cwkb_uri(a) for a in kb.get_authors()}
cwkb_work_uris = {w.subject:to_cwkb_uri(w, cwkb_uri_work_template) for w in kb.get_works()}

In [9]:
# check that the CWKB URIs actually exist: they should, but you never know...
for uri in tqdm(cwkb_author_uris.values() + cwkb_work_uris.values()):
    assert requests.get(uri).status_code == 200

  1%|          | 77/6747 [00:25<36:20,  3.06it/s]

KeyboardInterrupt: 

In [11]:
#cwkb_author_uris.keys()[:100]

In [12]:
# add sameAs link for authors
for author in tqdm(kb.get_authors()):
    cwkb_uri = URIRef(cwkb_author_uris[author.subject])
    
    if not cwkb_uri in author.owl_sameAs:
        author.owl_sameAs.append(URIRef(cwkb_uri))
        author.update()
    
    #print author.get_urn(), author.owl_sameAs


  0%|          | 0/1548 [00:00<?, ?it/s][A
  2%|▏         | 34/1548 [00:00<00:04, 331.78it/s][A
  4%|▍         | 68/1548 [00:00<00:04, 330.99it/s][A
  7%|▋         | 101/1548 [00:00<00:04, 328.86it/s][A
  9%|▊         | 135/1548 [00:00<00:04, 329.56it/s][A
 11%|█         | 168/1548 [00:00<00:04, 327.95it/s][A
 13%|█▎        | 203/1548 [00:00<00:04, 330.25it/s][A
 15%|█▌        | 236/1548 [00:00<00:03, 329.53it/s][A
 17%|█▋        | 267/1548 [00:01<00:09, 138.74it/s][A
 20%|█▉        | 302/1548 [00:02<00:08, 148.92it/s][A
 22%|██▏       | 336/1548 [00:02<00:07, 157.86it/s][A
 24%|██▍       | 369/1548 [00:02<00:07, 165.56it/s][A
 26%|██▌       | 404/1548 [00:02<00:06, 173.29it/s][A
 28%|██▊       | 437/1548 [00:02<00:06, 179.55it/s][A
 30%|███       | 469/1548 [00:02<00:05, 184.97it/s][A
 32%|███▏      | 501/1548 [00:03<00:07, 133.69it/s][A
 35%|███▍      | 535/1548 [00:03<00:07, 138.97it/s][A
 37%|███▋      | 569/1548 [00:03<00:06, 143.96it/s][A
 39%|███▉      | 603/1

In [14]:
# add sameAs link for works
for work in tqdm(kb.get_works()):
    cwkb_uri = URIRef(cwkb_work_uris[work.subject])
    
    if not cwkb_uri in work.owl_sameAs:
        work.owl_sameAs.append(cwkb_uri)
        work.update()
        
    #print work.get_urn(), work.owl_sameAs

100%|██████████| 5199/5199 [00:41<00:00, 125.42it/s]


In [6]:
a = kb.get_authors()[100]
print a.owl_sameAs

[rdflib.term.URIRef(u'http://cwkb.org/author/id/1102/turtle')]


In [7]:
print repr(a)
print a.get_works()
for link in a.owl_sameAs:
    print link

HucitAuthor (names=[Theokritos (@None),Theokrit (@el),Teocrito (@it),Theocritus (@la),Théocrite (@fr)],urn=urn:cts:greekLit:tlg0005)
[HucitWork (title=[Idyllia (@la),Idylles (@fr),Idylls (@en)],urn=urn:cts:greekLit:tlg0005.tlg001), HucitWork (title=[Syrinx (@la)],urn=urn:cts:cwkb:1102.3935), HucitWork (title=[Epigrammata (@la)],urn=urn:cts:greekLit:tlg0005.tlg002)]
http://cwkb.org/author/id/1102/turtle


In [20]:
%run kb_addenda.py

### Abbreviations

In [21]:
abbreviations

[('urn:cts:cwkb:431.904', 'Orat.'),
 ('urn:cts:latinLit:phi0474.phi005', 'Verr.'),
 ('urn:cts:latinLit:phi0474.phi010', 'Cluent.'),
 ('urn:cts:cwkb:998', 'Nicand.'),
 ('urn:cts:cwkb:998.3421', 'Ther.'),
 ('urn:cts:cwkb:1322.4345', 'Hier. Epist.'),
 ('urn:cts:greekLit:tlg0019.tlg009', 'Ra.'),
 ('urn:cts:greekLit:tlg0062.tlg031', 'Philops.'),
 ('urn:cts:cwkb:2386.8494', 'Gest. Pelag.'),
 ('urn:cts:cwkb:2388.8677', 'Malch.'),
 ('urn:cts:latinLit:phi0972.phi001', 'Satyr.'),
 ('urn:cts:latinLit:phi0972', 'Petr.'),
 ('urn:cts:cwkb:1351.4377', '[ Verg. ] catal.'),
 ('urn:cts:cwkb:664', 'Val. Flac.'),
 ('urn:cts:greekLit:tlg0019.tlg008', 'Thesm.'),
 ('urn:cts:greekLit:tlg0086.tlg003', 'Ath.'),
 ('urn:cts:cwkb:766.1654', 'Meteor.'),
 ('urn:cts:greekLit:tlg0003.tlg001', 'Thuc.'),
 ('urn:cts:cwkb:1186', 'X. Eph.'),
 ('urn:cts:latinLit:stoa0023.stoa001', 'RG'),
 ('urn:cts:greekLit:tlg0001.tlg001', 'AR'),
 ('urn:cts:latinLit:phi0959.phi006', 'M.'),
 ('urn:cts:latinLit:phi0978.phi001', 'NH'),
 ('urn

In [22]:
Work = kb._session.get_class(surf.ns.EFRBROO['F1_Work'])
Author = kb._session.get_class(surf.ns.EFRBROO['F10_Person'])

In [23]:
for urn, abbr in abbreviations:
    resource = kb.get_resource_by_urn(urn)
    label = kb.get_author_label(urn) if type(resource)==Author else kb.get_work_label(urn)
    print "Adding abbreviation %s to %s (%s): added=%s" % (abbr, label, urn, resource.add_abbreviation(abbr)) 
    resource.load()
    assert abbr in resource.get_abbreviations()



Adding abbreviation Orat. to About the Orator, also dedicated to Brutus (urn:cts:cwkb:431.904): added=True
Adding abbreviation Verr. to Against Verres (urn:cts:latinLit:phi0474.phi005): added=False
Adding abbreviation Cluent. to In defence of Aulus Cluentius Habitus (urn:cts:latinLit:phi0474.phi010): added=False
Adding abbreviation Nicand. to Nicander Of Colophon (urn:cts:cwkb:998): added=True
Adding abbreviation Ther. to On poisonous animals (urn:cts:cwkb:998.3421): added=True
Adding abbreviation Hier. Epist. to Epistulae (urn:cts:cwkb:1322.4345): added=True




Adding abbreviation Ra. to The Frogs (urn:cts:greekLit:tlg0019.tlg009): added=False
Adding abbreviation Philops. to The Lover of Lies or the Doubter (urn:cts:greekLit:tlg0062.tlg031): added=False
Adding abbreviation Gest. Pelag. to De gestis Pelagii (urn:cts:cwkb:2386.8494): added=True
Adding abbreviation Malch. to Life of Malchus (urn:cts:cwkb:2388.8677): added=True
Adding abbreviation Satyr. to Satyricon (urn:cts:latinLit:phi0972.phi001): added=True
Adding abbreviation Petr. to Petronio (urn:cts:latinLit:phi0972): added=True
Adding abbreviation [ Verg. ] catal. to Catalepton (urn:cts:cwkb:1351.4377): added=True
Adding abbreviation Val. Flac. to Valerius Flaccus (urn:cts:cwkb:664): added=True
Adding abbreviation Thesm. to The Women Celebrating the Thesmophoria (urn:cts:greekLit:tlg0019.tlg008): added=True
Adding abbreviation Ath. to Athenaion Politeia (urn:cts:greekLit:tlg0086.tlg003): added=True
Adding abbreviation Meteor. to Meteorology (urn:cts:cwkb:766.1654): added=True
Adding ab

### Names

### Titles

### False author names

In [28]:
kb.get_resource_by_urn("urn:cts:greekLit:tlg2018").get_names()

[(None, u'Eusebius'),
 (u'fr', u'Eus\xe8be De C\xe9sar\xe9e'),
 (None, u'Eusebios'),
 (u'it', u'Eusebio Di Cesarea'),
 (None, u'Eusebio'),
 (u'en', u'Eusebius Of Caesarea'),
 (u'de', u'Eusebius Von Caesarea'),
 (u'la', u'Eusebius Caesariensis')]

In [24]:
false_names = [
("urn:cts:cwkb:897", "Gr")

, ("urn:cts:greekLit:tlg2018", "Eus")

, ("urn:cts:cwkb:892", "G")

, ("urn:cts:cwkb:892", "Gel.")

, ("urn:cts:cwkb:849", "Ephr.") 

, ("urn:cts:latinLit:stoa0023", "Amm.")

, ("urn:cts:cwkb:1112", "Th")

, ("urn:cts:greekLit:tlg0004", "Diog")

, ("urn:cts:cwkb:866", "Eun.")

, ("urn:cts:cwkb:898", "Gr")

, ("urn:cts:cwkb:949", "Iust.")

, ("urn:cts:cwkb:1314", "Gr")

, ("urn:cts:cwkb:903", "H")

, ("urn:cts:cwkb:715", "Th")

, ("urn:cts:cwkb:884", "Firm.")

, ("urn:cts:cwkb:896", "Gr")

, ("urn:cts:cwkb:872", "Eust.")

, ("urn:cts:cwkb:622", "S")

, ("urn:cts:cwkb:444", "Corn")

, ("urn:cts:cwkb:769", "Art")
]

In [25]:
for urn, false_name in false_names[1:]:
    author = kb.get_resource_by_urn(urn)
    names = [name for lang, name in author.get_names()]
    abbreviations = [abbrev for abbrev in author.get_abbreviations()]
    print "%s: %i names, %i abbreviations" % (urn, len(names), len(abbreviations))
    print false_name in names
    
    # remove false name and add it as abbreviation
    print "removed name %s from %s. success=%s" % (name, urn, author.remove_name(false_name))
    new_abbr = "%s." % false_name if not "." in false_name else false_name
    author.add_abbreviation(new_abbr)
    author.load()
    
    # verify that the change has taken place 
    names = [name for lang, name in author.get_names()]
    abbreviations = [abbrev for abbrev in author.get_abbreviations()]
    print "%s: %i names, %i abbreviations.\n" % (urn, len(names), len(abbreviations))



urn:cts:greekLit:tlg2018: 9 names, 1 abbreviations
True
removed name Eusebius Caesariensis from urn:cts:greekLit:tlg2018. success=True
urn:cts:greekLit:tlg2018: 8 names, 1 abbreviations.

urn:cts:cwkb:892: 9 names, 1 abbreviations
True
removed name Gel. from urn:cts:cwkb:892. success=True
urn:cts:cwkb:892: 8 names, 2 abbreviations.

urn:cts:cwkb:892: 8 names, 2 abbreviations
True
removed name Gel. from urn:cts:cwkb:892. success=True
urn:cts:cwkb:892: 7 names, 3 abbreviations.

urn:cts:cwkb:849: 12 names, 1 abbreviations
True
removed name Ephraem Der Syrer from urn:cts:cwkb:849. success=True
urn:cts:cwkb:849: 11 names, 2 abbreviations.

urn:cts:latinLit:stoa0023: 9 names, 1 abbreviations
True
removed name Amm. from urn:cts:latinLit:stoa0023. success=True
urn:cts:latinLit:stoa0023: 8 names, 2 abbreviations.

urn:cts:cwkb:1112: 9 names, 1 abbreviations
True
removed name Theophilus Of Antioch from urn:cts:cwkb:1112. success=True
urn:cts:cwkb:1112: 8 names, 2 abbreviations.

urn:cts:greekLi

In [26]:
author = kb.get_resource_by_urn(urn) 

In [27]:
author.get_abbreviations()

[u'Art.', u'Artem.']

## Add `owl:sameAs` for VIAF, Wikidata, Perseus

In [8]:
authors_df = pd.read_pickle("knowledge_base/data/pickles/authors.pickle")

In [9]:
sameas_links = {author_urn : {'perseus':None, 'viaf':None, 'wikidata':None} for author_urn in list(authors_df.index)}

In [10]:
len(sameas_links)

118

### Perseus

In [11]:
for urn in tqdm(list(authors_df.index)):
    perseus_link = "http://data.perseus.org/catalog/%s" % urn
    #req = requests.get(perseus_link)
    try:
        #assert req.status_code == 200
        sameas_links[urn]['perseus'] = perseus_link
    except AssertionError as e:
        print "Got code %i for %s: double check %s" % (req.status_code, urn, perseus_link) 

100%|██████████| 118/118 [00:00<00:00, 720419.03it/s]


In [12]:
authors_df.loc["urn:cts:latinLit:phi0470"]

name                 Unknown
cts_namespace       latinLit
cts_id_prefix            phi
cts_id                  0470
viaf_id                 None
lccn                    None
wikidata_id             None
wikipedia_url_en        None
wikipedia_url_es        None
wikipedia_url_de        None
wikipedia_url_it        None
wikipedia_url_fr        None
Name: urn:cts:latinLit:phi0470, dtype: object

In [13]:
authors_df[["viaf_id", "wikidata_id"]].head(10)

Unnamed: 0_level_0,viaf_id,wikidata_id
urn,Unnamed: 1_level_1,Unnamed: 2_level_1
urn:cts:greekLit:tlg0099,100219883.0,Q45936
urn:cts:greekLit:tlg2045,,
urn:cts:latinLit:phi0690,8194433.0,Q1398
urn:cts:greekLit:tlg0024,100036127.0,
urn:cts:greekLit:tlg0001,262974255.0,Q192638
urn:cts:greekLit:tlg0034,268590928.0,Q373685
urn:cts:greekLit:tlg0363,54152998.0,Q34943
urn:cts:greekLit:tlg0016,100225976.0,Q26825
urn:cts:latinLit:phi0917,100902938.0,Q188646
urn:cts:greekLit:tlg2934,44435485.0,Q51884


In [14]:
sameas_links.values()[:10]

[{'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0030',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0032',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0033',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0034',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0035',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0036',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg1600',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg1216',
  'viaf': None,
  'wikidata': None},
 {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg2003',
  'viaf': None,
  'wikidata': None},
 {'perseus

### Wikidata

example (Strabo) http://www.wikidata.org/entity/Q45936

in a `owl:sameAs` property, which URI to use? http://www.wikidata.org/wiki/Special:EntityData/Q45936 ?

In [15]:
for urn in tqdm(list(authors_df.index)):
    wikidata_id = authors_df.loc[urn]["wikidata_id"]
    if wikidata_id != "None":
        wikidata_link = "http://www.wikidata.org/wiki/Special:EntityData/%s" % authors_df.loc[urn]["wikidata_id"]
        sameas_links[urn]["wikidata"] = wikidata_link

100%|██████████| 118/118 [00:00<00:00, 4510.87it/s]


### VIAF

In [16]:
for urn in tqdm(list(authors_df.index)):
    viaf_id = authors_df.loc[urn]["viaf_id"]
    if viaf_id != "None":
        viaf_link = "http://viaf.org/viaf/%s" % authors_df.loc[urn]["viaf_id"]
        sameas_links[urn]["viaf"] = viaf_link

100%|██████████| 118/118 [00:00<00:00, 4122.58it/s]


In [17]:
sameas_links

{'urn:cts:greekLit:tlg0001': {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0001',
  'viaf': u'http://viaf.org/viaf/262974255',
  'wikidata': u'http://www.wikidata.org/wiki/Special:EntityData/Q192638'},
 'urn:cts:greekLit:tlg0003': {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0003',
  'viaf': u'http://viaf.org/viaf/95161463',
  'wikidata': u'http://www.wikidata.org/wiki/Special:EntityData/Q41683'},
 'urn:cts:greekLit:tlg0004': {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0004',
  'viaf': u'http://viaf.org/viaf/41839141',
  'wikidata': u'http://www.wikidata.org/wiki/Special:EntityData/Q59138'},
 'urn:cts:greekLit:tlg0005': {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0005',
  'viaf': u'http://viaf.org/viaf/95161348',
  'wikidata': u'http://www.wikidata.org/wiki/Special:EntityData/Q219484'},
 'urn:cts:greekLit:tlg0006': {'perseus': 'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0006',
  'viaf': u'http://v

### Add 'em all

In [18]:
for urn in tqdm(sameas_links.keys()):
    try:
        author = kb.get_resource_by_urn(urn)
        for link_source in sameas_links[urn]:
            if link_source is not None:
                uri = URIRef(sameas_links[urn][link_source])
                if not uri in author.owl_sameAs: 
                    author.owl_sameAs.append(uri)
                    author.update()
                print author.owl_sameAs
    except Exception as e:
        print "%s not in the KB" % urn

  0%|          | 0/118 [00:00<?, ?it/s]

[rdflib.term.URIRef(u'http://cwkb.org/author/id/930/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/264197546')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/930/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/264197546'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q314447')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/930/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/264197546'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q314447'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0030')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1129/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/89597697')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1129/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/89597697'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q129772')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1129/turtle'), rdflib.term.URIRef(u'http://via

  3%|▎         | 4/118 [00:00<00:02, 39.70it/s]


urn:cts:greekLit:tlg0033 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/958/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268590928')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/958/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268590928'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q373685')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/958/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268590928'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q373685'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0034')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/993/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/299323588')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/993/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/299323588'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q957548')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/993/tu

  8%|▊         | 9/118 [00:00<00:02, 41.68it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/2663/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/163752378'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q1231472')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2663/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/163752378'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q1231472'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg1600')]
urn:cts:greekLit:tlg1216 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/886/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/57406701')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/886/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/57406701'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q33941')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/886/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/57406701'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Spec

 11%|█         | 13/118 [00:00<00:02, 41.11it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/1031/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/77132959')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1031/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/77132959'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q192931')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1031/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/77132959'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q192931'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0525')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/580/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219094')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/580/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219094'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q52166')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/580/turtle'), rdflib.term.URIRef(u'http://viaf

 16%|█▌        | 19/118 [00:00<00:02, 43.94it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/2846/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/89594750'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q172198')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2846/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/89594750'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q172198'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:latinLit:stoa0023')]
urn:cts:latinLit:phi1212 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/833/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268224700')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/833/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268224700'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q473421')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/833/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268224700'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Spe

 19%|█▉        | 23/118 [00:00<00:02, 42.69it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/917/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/122220717'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q44233')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/917/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/122220717'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q44233'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0020')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/735/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/110155699')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/735/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/110155699'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q391654')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/735/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/110155699'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q391654'), rdflib.term.URI

 25%|██▍       | 29/118 [00:00<00:01, 44.69it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/644/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100226923')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/644/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100226923'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q2161')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/644/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100226923'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q2161'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:latinLit:phi1351')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/923/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/300089234')]
urn:cts:greekLit:tlg0627 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2401/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/44435485')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2401/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/44435485'), rdflib.term.URIRef(u'

 30%|██▉       | 35/118 [00:00<00:01, 46.09it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/765/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/20962036'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q43353')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/765/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/20962036'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q43353'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0019')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/915/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100225976')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/915/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100225976'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q26825')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/915/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100225976'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q26825'), rdflib.term.URIRef(

 34%|███▍      | 40/118 [00:01<00:02, 33.68it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/939/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/36926979'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q221182')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/939/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/36926979'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q221182'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0010')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1090/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/101760867')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1090/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/101760867'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q7235')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1090/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/101760867'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q7235'), rdflib.term.URIR

 38%|███▊      | 45/118 [00:01<00:02, 34.45it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/2847/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219535')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2847/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219535'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q205704')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2847/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219535'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q205704'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0548')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1396/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/103210101')]
urn:cts:greekLit:tlg0652 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/753/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100908520')]
urn:cts:greekLit:tlg0653 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/525/turtle'), rdflib.term.URIRef(u'http://v

 44%|████▍     | 52/118 [00:01<00:01, 36.73it/s]


urn:cts:latinLit:phi0914 not in the KB
urn:cts:latinLit:phi1017 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/797/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/95167625')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/797/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/95167625'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q185223')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/797/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/95167625'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q185223'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0385')]
urn:cts:greekLit:tlg4029 not in the KB
urn:cts:latinLit:phi1294 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1138/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/89002230')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1138/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/89002230'), rdflib.term.URIRef(u

 49%|████▉     | 58/118 [00:01<00:01, 38.01it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/1115/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/95161463'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q41683')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1115/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/95161463'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q41683'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0003')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/791/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/3284583')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/791/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/3284583'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q713482')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/791/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/3284583'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q713482'), rdflib.term.URIRef(u'

 53%|█████▎    | 63/118 [00:01<00:01, 38.05it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/1172/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100169636'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q294923')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1172/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100169636'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q294923'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0008')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/805/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/54152998')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/805/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/54152998'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q34943')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/805/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/54152998'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q34943'), rdflib.term.URIR

 58%|█████▊    | 68/118 [00:01<00:01, 38.05it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/850/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100218891'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q183144')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/850/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100218891'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q183144'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0557')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1113/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/265397758')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1113/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/265397758'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q160362')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1113/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/265397758'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q160362'), rdflib.ter

 63%|██████▎   | 74/118 [00:01<00:01, 39.02it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/2639/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/87464456'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q1371938')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2639/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/87464456'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q1371938'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0647')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1093/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219883')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1093/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219883'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q45936')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1093/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100219883'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q45936'), rdflib.ter

 68%|██████▊   | 80/118 [00:03<00:01, 20.44it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/861/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/176184097'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q8747')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/861/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/176184097'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q8747'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg1799')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1068/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/76294608')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1068/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/76294608'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q352702')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1068/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/76294608'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q352702'), rdflib.term.URIRe

 74%|███████▎  | 87/118 [00:04<00:01, 21.54it/s]


urn:cts:latinLit:phi0830 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/838/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100218889')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/838/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100218889'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q26204')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/838/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/100218889'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q26204'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0081')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/719/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268526195')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/719/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/268526195'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q40939')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/719/turtl

 80%|███████▉  | 94/118 [00:04<00:01, 22.70it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/2431/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/56608763'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q171241')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/2431/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/56608763'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q171241'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0060')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/957/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/68976194')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/957/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/68976194'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q432737')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/957/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/68976194'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q432737'), rdflib.term.URIR

 84%|████████▍ | 99/118 [00:04<00:00, 23.19it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/678/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/8194433'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q1398')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/678/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/8194433'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q1398'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:latinLit:phi0690')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/682/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/46768430')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/682/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/46768430'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q47163')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/682/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/46768430'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q47163'), rdflib.term.URIRef(u'http:

 92%|█████████▏| 109/118 [00:04<00:00, 24.92it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/583/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/66462281'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q47160')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/583/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/66462281'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q47160'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:latinLit:phi0119')]
urn:cts:latinLit:phi1276 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/1445/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/269609305')]
urn:cts:greekLit:tlg0284 not in the KB
[rdflib.term.URIRef(u'http://cwkb.org/author/id/871/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/4929593')]
[rdflib.term.URIRef(u'http://cwkb.org/author/id/871/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/4929593'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q142999')]
[rdflib.term.URIRef(u'

100%|██████████| 118/118 [00:04<00:00, 26.07it/s]


[rdflib.term.URIRef(u'http://cwkb.org/author/id/759/turtle'), rdflib.term.URIRef(u'http://viaf.org/viaf/60528301'), rdflib.term.URIRef(u'http://www.wikidata.org/wiki/Special:EntityData/Q505931'), rdflib.term.URIRef(u'http://data.perseus.org/catalog/urn:cts:greekLit:tlg0719')]
urn:cts:latinLit:phi1100 not in the KB
urn:cts:latinLit:phi1020 not in the KB
urn:cts:greekLit:tlg7000 not in the KB





In [21]:
kb.get_resource_by_urn("urn:cts:greekLit:tlg0010").subject

rdflib.term.URIRef(u'http://purl.org/hucit/kb/authors/939')

## Gather LOD stats

In [51]:
author_lod_info = []

for author in kb.get_authors():
    author_info = {}
    has_cwkb_link = True if len([link for link in author.owl_sameAs if "cwkb" in str(link)])>0 else False
    has_viaf_link = True if len([link for link in author.owl_sameAs if "viaf" in str(link)])>0 else False
    has_catalog_link = True if len([link for link in author.owl_sameAs if "perseus.org/catalog/" in str(link)])>0 else False
    has_wikidata_link = True if len([link for link in author.owl_sameAs if "wikidata" in str(link)])>0 else False
    urn = author.get_urn()
    author_info = {
        'has_cwkb_link':has_cwkb_link,
        'has_viaf_link':has_viaf_link,
        'has_catalog_link':has_catalog_link,
        'has_wikidata_link':has_wikidata_link,
        'urn':urn,
        'type':str(type(author)),
        'label':kb.get_author_label(urn) if urn is not None else ""
    }
    author_lod_info.append(author_info)

In [52]:
work_lod_info = []

for work in kb.get_works():
    work_info = {}
    has_cwkb_link = True if len([link for link in work.owl_sameAs if "cwkb" in str(link)])>0 else False
    has_viaf_link = True if len([link for link in work.owl_sameAs if "viaf" in str(link)])>0 else False
    has_catalog_link = True if len([link for link in work.owl_sameAs if "perseus.org/catalog/" in str(link)])>0 else False
    has_wikidata_link = True if len([link for link in work.owl_sameAs if "wikidata" in str(link)])>0 else False
    urn = work.get_urn()
    work_info = {
        'has_cwkb_link':has_cwkb_link,
        'has_viaf_link':has_viaf_link,
        'has_catalog_link':has_catalog_link,
        'has_wikidata_link':has_wikidata_link,
        'urn':urn,
        'type':str(type(work)),
        'label':kb.get_work_label(urn) if urn is not None else ""
    }
    work_lod_info.append(work_info)

In [53]:
df_authors = pd.DataFrame.from_dict(author_lod_info).set_index('urn', drop=True)

In [54]:
df_works = pd.DataFrame.from_dict(work_lod_info).set_index('urn', drop=True)

In [55]:
df = pd.concat([df_authors, df_works])

In [3]:
df = pd.read_csv("./kb_lod_stats.csv", encoding="utf-8")

In [56]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6747 entries, urn:cts:cwkb:1000 to urn:cts:cwkb:474.999
Data columns (total 6 columns):
has_catalog_link     6747 non-null bool
has_cwkb_link        6747 non-null bool
has_viaf_link        6747 non-null bool
has_wikidata_link    6747 non-null bool
label                6747 non-null object
type                 6747 non-null object
dtypes: bool(4), object(2)
memory usage: 184.5+ KB


In [57]:
df["type"]

urn
urn:cts:cwkb:1000           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1001           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:greekLit:tlg2045    <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1003           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1004           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1005           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1006           <class 'surf.resource.EfrbrooF10_Person'>
NaN                         <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:greekLit:tlg0648    <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:greekLit:tlg0024    <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1010           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1011           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1012           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:1013           <class 'surf.resource.EfrbrooF10_Person'>
urn:cts:cwkb:101

In [74]:
lod_stats = []

In [75]:
df_authors = df[df["type"]=="<class 'surf.resource.EfrbrooF10_Person'>"]
columns = [column for column in df_authors.columns if 'has' in column]
tmp = {"label":"Authors"}
for column in columns:
    stats = dict(df_authors[column].value_counts(normalize=True))
    tmp[column] = stats[True]*100
lod_stats.append(tmp)

In [76]:
lod_stats

[{'has_catalog_link': 4.909560723514212,
  'has_cwkb_link': 100.0,
  'has_viaf_link': 5.8785529715762275,
  'has_wikidata_link': 4.909560723514212,
  'label': 'Authors'}]

In [78]:
df_lod_stats = pd.DataFrame(lod_stats, index=[stat["label"] for stat in lod_stats])

In [86]:
df_lod_stats

Unnamed: 0,has_catalog_link,has_cwkb_link,has_viaf_link,has_wikidata_link,label
Authors,4.909561,100.0,5.878553,4.909561,Authors


In [87]:
print tabulate(df_lod_stats[["has_catalog_link", "has_cwkb_link", "has_viaf_link", "has_wikidata_link"]]
               , ["link to Perseus Catalog (\%)", "link to CWKB (\%)", "link to VIAF (\%)", "link to Wikidata (\%)"]
               , tablefmt="pipe", floatfmt=".2f")

|         |   link to Perseus Catalog (\%) |   link to CWKB (\%) |   link to VIAF (\%) |   link to Wikidata (\%) |
|:--------|-------------------------------:|--------------------:|--------------------:|------------------------:|
| Authors |                           4.91 |              100.00 |                5.88 |                    4.91 |


In [71]:
df_works = df[df["type"]=="<class 'surf.resource.EfrbrooF1_Work'>"]
columns = [column for column in df_works.columns if 'has' in column]
for column in columns:
    stats = dict(df_works[column].value_counts(normalize=True))
    if True in stats:
        print(column,stats[True]*100)
    else:
        print(column, 0)

('has_catalog_link', 0)
('has_cwkb_link', 100.0)
('has_viaf_link', 0)
('has_wikidata_link', 0)


In [68]:
df_works

Unnamed: 0_level_0,has_catalog_link,has_cwkb_link,has_viaf_link,has_wikidata_link,label,type
urn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
urn:cts:cwkb:474.1000,False,True,False,False,On the quality of fields,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:474.1001,False,True,False,False,De Controversiis,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:474.1002,False,True,False,False,De Limitibus,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:474.1003,False,True,False,False,Art of Surveying,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:475.1004,False,True,False,False,Ad M. Caesarem Epistulae,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:475.1005,False,True,False,False,Ad M. Antoninum Imperatorem Epistulae,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:475.1006,False,True,False,False,Ad Verum Imperatorem Epistulae,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:475.1007,False,True,False,False,De Eloquentia,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:475.1008,False,True,False,False,De Orationibus,<class 'surf.resource.EfrbrooF1_Work'>
urn:cts:cwkb:475.1009,False,True,False,False,Ad Antoninum Pium Epistulae,<class 'surf.resource.EfrbrooF1_Work'>


In [69]:
dict(df['has_catalog_link'].value_counts(normalize=True))

{False: 0.98873573440047424, True: 0.011264265599525715}

In [21]:
columns = [column for column in df.columns if 'has' in column]
for column in columns:
    stats = dict(df[column].value_counts(normalize=True))
    print(column,stats[True]*100)

(u'has_catalog_link', 1.1264265599525713)
(u'has_cwkb_link', 100.0)
(u'has_viaf_link', 1.3487475915221581)
(u'has_wikidata_link', 1.1264265599525713)


In [87]:
df.to_csv("./kb_lod_stats.csv", encoding="utf-8")

TODO: create table with tabulate

## Example of interaction with the KB

In [34]:
import pprint
import pkg_resources
from knowledge_base import KnowledgeBase

In [45]:
virtuoso_cfg_file = pkg_resources.resource_filename('knowledge_base','config/virtuoso.ini')

In [46]:
kb = KnowledgeBase(virtuoso_cfg_file)

In [40]:
search_results = kb.search('Omero')

In [44]:
for search_query, result in search_results:
    print search_query, result.get_urn()
    pprint.pprint(result.get_names())

Omero urn:cts:greekLit:tlg0012
[(u'fr', u'Hom\xe8re'),
 (u'la', u'Homerus'),
 (None, u'Homeros'),
 (u'en', u'Homer'),
 (u'it', u'Omero')]


In [49]:
print result.to_json()

{
  "name_abbreviations": [
    "Hom."
  ], 
  "urn": "urn:cts:greekLit:tlg0012", 
  "works": [
    {
      "urn": "urn:cts:cwkb:927.2814", 
      "titles": [
        {
          "language": "la", 
          "label": "Epigrammata"
        }
      ], 
      "uri": "http://purl.org/hucit/kb/works/2814", 
      "title_abbreviations": [
        "epigr."
      ]
    }, 
    {
      "urn": "urn:cts:greekLit:tlg0012.tlg001", 
      "titles": [
        {
          "language": "it", 
          "label": "Iliade"
        }, 
        {
          "language": "la", 
          "label": "Ilias"
        }, 
        {
          "language": "en", 
          "label": "Iliad"
        }, 
        {
          "language": "de", 
          "label": "Ilias"
        }, 
        {
          "language": "fr", 
          "label": "L'Iliade"
        }
      ], 
      "uri": "http://purl.org/hucit/kb/works/2815", 
      "title_abbreviations": [
        "Il."
      ]
    }, 
    {
      "urn": "urn:cts:greekLit:tlg001

## Playing around with dask

In [57]:
kb = KnowledgeBase("/Users/rromanello/Documents/ClassicsCitations/hucit_kb/knowledge_base/config/virtuoso_local.ini")

In [59]:
kb.get_authors()[-1]

HucitAuthor (names=[Nicias Epigrammaticus (@la),Nikias (@None),Nicias (@None)],urn=urn:cts:cwkb:999)

### Authors

In [60]:
def get_author_info(urn, kb):
    author = kb.get_resource_by_urn(urn)
    return {
        "urn": author.get_urn(),
        "names": [name for lang, name in author.get_names()]
    }
    

In [62]:
%%time
tasks = [
        delayed(get_author_info)(urn, kb)
        for urn in [a.get_urn() for a in kb.get_authors()]
        if urn is not None
]

CPU times: user 5.73 s, sys: 1.88 s, total: 7.61 s
Wall time: 36.3 s


In [63]:
 with ProgressBar():
        result = compute(*tasks, get=mp_get)

[########################################] | 100% Completed |  1min 25.9s


In [64]:
df = pd.DataFrame(list(result))

In [67]:
df.head(5)

Unnamed: 0,names,urn
0,"[Nikomachos, Nicomaco, Nicomachus, Nicomachus ...",urn:cts:cwkb:1000
1,"[Nilus Ancyranus, Nil, Nilo, Nilus]",urn:cts:cwkb:1001
2,"[Nonnus Panapolitanus, Nonnus Of Panopolis, No...",urn:cts:greekLit:tlg2045
3,[Nostoi],urn:cts:cwkb:1003
4,"[Occelus, Ocellus Lucanus, Ocellus, Okellos, E...",urn:cts:cwkb:1004


In [69]:
df.to_csv("/Users/rromanello/Downloads/kb_authors.csv", encoding="utf-8")

### Works

In [82]:
def get_work_info(urn, kb):
    work = kb.get_resource_by_urn(urn)
    author_urn = work.author.get_urn()
    return {
        "urn": urn,
        "titles": [title for lang, title in work.get_titles()],
        "author_urn": author_urn,
        "author_label": kb.get_author_label(author_urn)
    }
    

In [83]:
%%time

tasks = [
        delayed(get_work_info)(urn, kb)
        for urn in [w.get_urn() for a in kb.get_authors() for w in a.get_works()]
        if urn is not None
]

CPU times: user 30.2 s, sys: 10.3 s, total: 40.4 s
Wall time: 2min 58s


In [85]:
 with ProgressBar():
        result = compute(*tasks, get=mp_get)

df = pd.DataFrame(list(result))

[########################################] | 100% Completed |  7min 36.5s


In [89]:
df.head(10)

Unnamed: 0,author_label,author_urn,titles,urn
0,Nikomachos,urn:cts:cwkb:1000,[Introductio arithmetica],urn:cts:cwkb:1000.3425
1,Nikomachos,urn:cts:cwkb:1000,[Theologoumena arithmeticae],urn:cts:cwkb:1000.3426
2,Nikomachos,urn:cts:cwkb:1000,[Excerpta],urn:cts:cwkb:1000.3423
3,Nikomachos,urn:cts:cwkb:1000,[Harmonicum enchiridion],urn:cts:cwkb:1000.3424
4,Nilus,urn:cts:cwkb:1001,[Commentarii in Canticum Canticorum],urn:cts:cwkb:1001.3427
5,Nilus,urn:cts:cwkb:1001,[Narrationes septem de monachis in Sina],urn:cts:cwkb:1001.3428
6,Nilus,urn:cts:cwkb:1001,[De monastica exercitatione],urn:cts:cwkb:1001.8226
7,Nilus,urn:cts:cwkb:1001,[Epistulae],urn:cts:cwkb:1001.9143
8,Nonnus Of Panopolis,urn:cts:greekLit:tlg2045,"[Dionysiaka, Dionisiache, Dionysiaca, Les Dion...",urn:cts:greekLit:tlg2045.tlg001
9,Nonnus Of Panopolis,urn:cts:greekLit:tlg2045,"[Paraphrase of the Gospel of John, Paraphrase ...",urn:cts:cwkb:1002.3431


In [90]:
df.to_csv("/Users/rromanello/Downloads/kb_works.csv", encoding="utf-8")

In [96]:
a = kb.get_resource_by_urn("urn:cts:cwkb:630.1310")

In [98]:
cwb_uri = a.owl_sameAs[0]

In [99]:
cwb_uri

rdflib.term.URIRef(u'http://cwkb.org/work/id/1310/turtle')