In [1]:
from itertools import islice
from collections import Counter

import json

from pandas import DataFrame
import pandas as pd



In [2]:
from periodo_reconciler import (
    RProperty,
    RQuery,
    PeriodoReconciler
)

p_recon = PeriodoReconciler(host='localhost:8142')

In [3]:
DATA_PATH = "../data/p0d.json"
rdata = json.load(open(DATA_PATH))

In [4]:
rdata.keys()

dict_keys(['periodCollections', 'id', '@context', 'type', 'primaryTopicOf'])

In [5]:
list(rdata['periodCollections'].values())[1]

{'definitions': {'p0qhb6622kc': {'editorialNote': 'Derived from FASTI, but no match in 2004 dataset.',
   'id': 'p0qhb6622kc',
   'label': 'geometrico',
   'language': 'ita-latn',
   'localizedLabels': {'eng-latn': ['Geometric', 'Phoenician'],
    'ita-latn': ['geometrico']},
   'note': 'http://www.worldcat.org/oclc/175286417',
   'spatialCoverageDescription': 'Sardinia',
   'start': {'in': {'year': '-0850'}, 'label': '2850 BP (2000)'},
   'stop': {'in': {'year': '-0731'}, 'label': '2731 BP (2000)'},
   'type': 'PeriodDefinition'},
  'p0qhb6622s5': {'editorialNote': 'Derived from Rijksdienst, but no match in the dataset.',
   'id': 'p0qhb6622s5',
   'label': 'Historie',
   'language': 'nld-latn',
   'localizedLabels': {'eng-latn': ['Historic'], 'nld-latn': ['Historie']},
   'note': 'Rijksdienst voor het Cultureel Erfgoed http://data.cultureelerfgoed.nl/semnet/724277e3-6329-48f2-88d7-f84703fdd01e',
   'spatialCoverage': [{'id': 'http://dbpedia.org/resource/Netherlands',
     'label': 'N

In [6]:
# number of definitions
__builtin__.sum([len(v['definitions']) for v in rdata['periodCollections'].values() ])

5466

In [7]:
for (k,v) in rdata['periodCollections'].items():
    print (k, len(v['definitions']))

p08p8mm 0
p0qhb66 660
p0pf2qb 40
p0bsjms 1
p0zmdxz 9
p0znh3t 4
p0244q7 1
p08nrfc 5
p0pp7vz 3
p086kj9 522
p0jf288 20
p0dntkb 3
p047fhm 27
p0qwjcd 11
p0dfzs7 1
p0sgtfr 1
p0339m9 53
p0qwcp6 6
p0v28d2 1
p03tqbz 2
p0ctc35 9
p02sht9 3
p044mt9 3
p0r2jnt 1
p0mwsd7 1
p0mn2nd 29
p0kh9ds 42
p0kgptq 4
p0pk6sc 1
p0z3skm 26
p08m57h 944
p0qp9rs 55
p0rrjd9 32
p02q72z 2
p0367jz 6
p03wskd 116
p0trc5r 5
p05n4hj 11
p0vhct4 13
p0cmdf9 4
p0j5frx 4
p0ms2ch 5
p0tns5v 7
p0vn2fr 32
p0pf7xr 10
p0dg76f 42
p07h9k6 30
p0pqptc 53
p08x7cz 2
p077fc5 4
p0rqpwq 4
p0s7jn8 2
p0cp447 11
p0m63nj 108
p0kc8t6 8
p0r8d9c 95
p0z5nvh 35
p0f65r2 27
p0wf3wd 13
p03tcss 6
p05xnzq 3
p08tf6p 19
p0d39r7 1
p0fp7wv 52
p0bd664 21
p0c3bh8 5
p0vm8tz 4
p0dhmkc 2
p0ff3dt 17
p0jrrjb 26
p0wnvm4 1
p0gjgrs 31
p0vrf7b 1
p06v8w4 212
p0323gx 5
p089gcf 11
p083p5r 63
p09xsbn 4
p0m9pcg 13
p0pv57g 18
p0fh3zc 2
p06xc6m 2
p03377f 2
p05hrsf 57
p06ptzs 1
p0hrtx5 1
p0k7ktv 1
p0zj6g8 48
p0vpm8v 13
p0wswdm 3
p044n25 6
p0hvcwr 1
p0jtbzw 31
p06c6g3 1463
p0dfxxp 5

In [8]:
rdata['periodCollections']['p0bsjms']

{'definitions': {'p0bsjms2dzb': {'id': 'p0bsjms2dzb',
   'label': 'Republican',
   'language': 'eng-latn',
   'localizedLabels': {'eng-latn': ['Republican']},
   'note': 'Beginning-date corresponds with the end of the Celtiberian wars; outside of the Celtiberian homeland, the Republican period in Spain begins 218 B.C. (Second Punic War).',
   'spatialCoverage': [{'id': 'http://dbpedia.org/resource/Spain',
     'label': 'Spain'}],
   'spatialCoverageDescription': 'Spain',
   'start': {'in': {'year': '-0132'}, 'label': '133 B.C.'},
   'stop': {'in': {'year': '-0026'}, 'label': '27 B.C.'},
   'type': 'PeriodDefinition'}},
 'id': 'p0bsjms',
 'source': {'locator': 'pages 3, 52',
  'partOf': {'creators': [{'id': 'http://viaf.org/viaf/17271321',
     'name': 'Curchin, Leonard A., 1950-'}],
   'id': 'http://www.worldcat.org/oclc/51937089',
   'title': 'The romanization of central Spain : complexity, diversity, and change in a provincial hinterland',
   'yearPublished': 2004}},
 'type': 'Period

In [9]:
def definitions_from_file(data_path):
    rdata = json.load(open(data_path))
    for v in rdata['periodCollections'].values():
        for definition in v['definitions'].values():
            yield {
                'id':definition['id'],
                'language': definition['language'], 
                'label':definition['label'],
                'start': definition['start'].get('in', {}).get('year'),
                'stop':definition['stop'].get('in', {}).get('year'),
                'spatialCoverageDescription':definition.get('spatialCoverageDescription'),
                'len_spatialCoverage': len(definition.get('spatialCoverage', [])) # definition.get('spatialCoverage', {}).get('label')
            }



In [10]:
len(list(islice(definitions_from_file(DATA_PATH), None)))

5466

In [11]:
list(islice(definitions_from_file(DATA_PATH), 10))

[{'id': 'p0qhb668r3q',
  'label': 'ранна римска епоха',
  'language': 'bul-cyrl',
  'len_spatialCoverage': 1,
  'spatialCoverageDescription': 'Bulgaria',
  'start': '0015',
  'stop': '0283'},
 {'id': 'p0qhb66k7q6',
  'label': 'Μέση Μινωική',
  'language': 'ell-latn',
  'len_spatialCoverage': 0,
  'spatialCoverageDescription': 'Crete',
  'start': '-1700',
  'stop': '-1071'},
 {'id': 'p0qhb663g4t',
  'label': 'Mittleres Jungpaläolithikum',
  'language': 'deu-latn',
  'len_spatialCoverage': 4,
  'spatialCoverageDescription': 'Germany, Switzerland, Czech Republic, Liechtenstein',
  'start': '-32500',
  'stop': '-22501'},
 {'id': 'p0qhb66dbmx',
  'label': 'Geometric (Cypro-Geometric) II',
  'language': 'eng-latn',
  'len_spatialCoverage': 1,
  'spatialCoverageDescription': 'Cyprus',
  'start': '-0950',
  'stop': '-0851'},
 {'id': 'p0qhb66wkxj',
  'label': 'римскиот период',
  'language': 'mkd-cyrl',
  'len_spatialCoverage': 1,
  'spatialCoverageDescription': 'Macedonia',
  'start': '0030',


In [12]:
ddf = DataFrame(list(islice(definitions_from_file(DATA_PATH), None)))
ddf.head()

Unnamed: 0,id,label,language,len_spatialCoverage,spatialCoverageDescription,start,stop
0,p0qhb668r3q,ранна римска епоха,bul-cyrl,1,Bulgaria,15,283
1,p0qhb66k7q6,Μέση Μινωική,ell-latn,0,Crete,-1700,-1071
2,p0qhb663g4t,Mittleres Jungpaläolithikum,deu-latn,4,"Germany, Switzerland, Czech Republic, Liechten...",-32500,-22501
3,p0qhb66dbmx,Geometric (Cypro-Geometric) II,eng-latn,1,Cyprus,-950,-851
4,p0qhb66wkxj,римскиот период,mkd-cyrl,1,Macedonia,30,299


In [13]:
ddf.spatialCoverageDescription.value_counts()

Cyprus                                                                                              147
Iowa                                                                                                140
France                                                                                              135
Denmark                                                                                             130
China                                                                                               125
Netherlands                                                                                         125
Florida                                                                                             123
Sweden                                                                                              106
Crete                                                                                                87
United Kingdom                                                  

In [14]:
ddf[ddf.spatialCoverageDescription == 'Cyprus']

Unnamed: 0,id,label,language,len_spatialCoverage,spatialCoverageDescription,start,stop
3,p0qhb66dbmx,Geometric (Cypro-Geometric) II,eng-latn,1,Cyprus,-0950,-0851
20,p0qhb666hmw,Ottoman,eng-latn,1,Cyprus,1572,1913
23,p0qhb66xqgx,Bronze Age,eng-latn,1,Cyprus,-2500,-1801
26,p0qhb66n37f,Geometric (Cypro-Geometric) I,eng-latn,1,Cyprus,-1050,-0951
32,p0qhb66r5d4,Middle Bronze Age,eng-latn,1,Cyprus,-1800,-1651
58,p0qhb66t6qg,Modern,eng-latn,1,Cyprus,1914,2000
126,p0qhb66b5bt,Early Bronze Age,eng-latn,1,Cyprus,-2500,-1801
143,p0qhb66nkmb,Proto-Neolithic,eng-latn,1,Cyprus,-8800,-6001
167,p0qhb667z6x,Archaic (Cypro-Archaic) I,eng-latn,1,Cyprus,-0750,-0601
199,p0qhb665nn2,Akrotiri,eng-latn,1,Cyprus,-9300,-8801


In [15]:
queries = [
    RQuery("北宋", label="basic-query"),
    RQuery("bronze age", label="limited-query", limit=1),
    RQuery("Ранньоримський", label="additional-properties-query", properties=[
        RProperty('location', 'Ukraine'),
        RProperty('start', 200),
        RProperty('end', 600)
    ])
]

In [16]:
r = p_recon.reconcile(queries, method='post')
r

{'additional-properties-query': {'result': [{'id': 'http://n2t.net/ark:/99152/p06v8w4dbcf',
    'match': True,
    'name': 'Ранньоримський період [Ukraine, Ukraine: -0049 to 0175]',
    'score': 0,
    'type': [{'id': 'http://www.w3.org/2004/02/skos/core#Concept',
      'name': 'Period definition'}]}]},
 'basic-query': {'result': [{'id': 'http://n2t.net/ark:/99152/p0fp7wvjvn8',
    'match': True,
    'name': 'Northern Song [China, China: 0960 to 1127]',
    'score': 0,
    'type': [{'id': 'http://www.w3.org/2004/02/skos/core#Concept',
      'name': 'Period definition'}]}]},
 'limited-query': {'result': [{'id': 'http://n2t.net/ark:/99152/p0z3skmnss7',
    'match': False,
    'name': 'Bronze [Palestine, Israel, Jordan: -3299 to -1199]',
    'score': 0,
    'type': [{'id': 'http://www.w3.org/2004/02/skos/core#Concept',
      'name': 'Period definition'}]}]}}

In [17]:
r = p_recon.reconcile([
    RQuery("Roman, Late", properties=[
        RProperty('location', 'Cyprus'),
    ])
], method='post')

r

{'1f30be64-02ca-4a9b-b52f-b350a37e7b02': {'result': []}}