In [1]:
import csv
import contextlib
import os, errno
from collections import OrderedDict, Counter

from IPython.core.display import display, HTML

from pandas import DataFrame
import pandas as pd
import numpy as np

from periodo_reconciler import (
    RProperty,
    RQuery,
    PeriodoReconciler,
    CsvReconciler
)

In [2]:
def output_path_name(inpath, test_data_dir="../test-data/",
                     test_output_dir="../test-output/"):
    """
    given a path in the test directory, return the path for corresponding output in the test output dir
    """
 
    return os.path.join(test_output_dir, os.path.relpath(inpath, start=test_data_dir))

def makedirs_for_path(fpath):
    """
    make sure that the directory for fpath exists
    """
    
    (fdir, fname) = os.path.split(fpath)
    try:
        os.makedirs(fdir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
            
    return True


In [3]:
def reconcile_to_csv_df(csv_path, kw, p_recon):
    """
    Inputs:  
    csv_path: the path of the csv file to reconcile
    kw: a dict holding the query, location, start, stop fields
    p_recon: a PeriodoReconciler object
    
    Side-effect:
    * a csv file with the matches in the output directory
    * returns a Pandas DataFrame holding the input data along with match data
    
    """

    output_path = output_path_name(csv_path)
    makedirs_for_path(output_path)
    
    # https://stackoverflow.com/a/19412700/7782
    with contextlib.ExitStack() as stack:
        csvfile = stack.enter_context(open(csv_path))
        outputfile = stack.enter_context(open(output_path, "w"))                  

        # p_recon = PeriodoReconciler(host='localhost:8142')
        c_recon = CsvReconciler(csvfile, p_recon, **kw)

        matches = list(c_recon.matches())

        c_recon.to_csv(outputfile, matches)
        df = DataFrame(matches)

        return df


# simple example of running the reconciler against a CSV file

In [4]:
# simple example

csv_path = "../test-data/periodo_simple_example.csv"
kw = {
   'location': 'location',
   'query': 'query',
   'start': 'start',
   'stop': 'end'
}

p_recon = PeriodoReconciler(host='localhost:8142')
df = reconcile_to_csv_df(csv_path, kw, p_recon)

df.match_num.value_counts()

 results_with_rows 0 0 results_with_rows 0 1 results_with_rows 0 2

1    2
0    1
Name: match_num, dtype: int64

# OpenContext examples

In [5]:
# the list of OpenContext CSV files along with the columns to be fed to the reconciler

OPENCONTEXT_TEST_FILES = [{'csv_path': '../test-data/OpenContext/Cyprus PKAP Survey.csv',
  'kw': {'location': 'Context (1)',
   'query': 'Period',
   'start': 'Early BCE/CE',
   'stop': 'Late BCE/CE'}},
 {'csv_path': '../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv',
  'kw': {'location': 'Context (1)',
   'query': 'periodo-pre-match',
   'start': 'Early BCE/CE',
   'stop': 'Late BCE/CE'}},
 {'csv_path': '../test-data/OpenContext/European Cattle with Periods.csv',
  'kw': {'location': 'Context (1)',
   'query': 'Period',
   'start': 'Early BCE/CE',
   'stop': 'Late BCE/CE',
   'ignored_queries':'Not determined'
  }},
 {'csv_path': '../test-data/OpenContext/Petra Artifacts.csv',
  'kw': {'location': 'Context (1)',
   'query': 'Culture',
   'start': 'Early BCE/CE',
   'stop': 'Late BCE/CE',
   'ignored_queries':'other,lb'
    }}]


kw_map = dict([(f['csv_path'], f['kw']) for f in OPENCONTEXT_TEST_FILES])

In [6]:
# write out incantation for the command line CSV reconciler

for f in OPENCONTEXT_TEST_FILES:
    options_from_kw = " ".join(['--{}="{}"'.format(k,v) for (k,v) in f['kw'].items()])
    print('periodo-reconciler-py {} "{}" -'.format(options_from_kw, f['csv_path'] ))

periodo-reconciler-py --location="Context (1)" --query="Period" --start="Early BCE/CE" --stop="Late BCE/CE" "../test-data/OpenContext/Cyprus PKAP Survey.csv" -
periodo-reconciler-py --location="Context (1)" --query="periodo-pre-match" --start="Early BCE/CE" --stop="Late BCE/CE" "../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv" -
periodo-reconciler-py --location="Context (1)" --query="Period" --start="Early BCE/CE" --stop="Late BCE/CE" --ignored_queries="Not determined" "../test-data/OpenContext/European Cattle with Periods.csv" -
periodo-reconciler-py --location="Context (1)" --query="Culture" --start="Early BCE/CE" --stop="Late BCE/CE" --ignored_queries="other,lb" "../test-data/OpenContext/Petra Artifacts.csv" -


In [7]:
p_recon = PeriodoReconciler(host='localhost:8142')
csv_path = '../test-data/OpenContext/Cyprus PKAP Survey.csv'
kw = {'location': 'Context (1)',
   'query': 'Period',
   'start': 'Early BCE/CE',
   'stop': 'Late BCE/CE'}

df = reconcile_to_csv_df(csv_path, kw, p_recon)

 results_with_rows 0 0 results_with_rows 0 1 results_with_rows 0 2 results_with_rows 0 3 results_with_rows 0 4 results_with_rows 0 5 results_with_rows 0 6 results_with_rows 0 7 results_with_rows 0 8 results_with_rows 0 9 results_with_rows 0 10 results_with_rows 0 11 results_with_rows 0 12 results_with_rows 0 13 results_with_rows 0 14 results_with_rows 0 15 results_with_rows 0 16 results_with_rows 0 17 results_with_rows 0 18 results_with_rows 0 19 results_with_rows 0 20 results_with_rows 0 21 results_with_rows 0 22 results_with_rows 0 23 results_with_rows 0 24 results_with_rows 0 25 results_with_rows 0 26 results_with_rows 0 27 results_with_rows 0 28 results_with_rows 0 29 results_with_rows 0 30 results_with_rows 0 31 results_with_rows 0 32 results_with_rows 0 33 results_with_rows 0 34 results_with_rows 0 35 results_with_rows 0 36 results_with_rows 0 37 results_with_rows 0 38 results_with_rows 0 39 results_with_rows 0 40 results_with_rows 0 41 

 646 results_with_rows 0 647 results_with_rows 0 648 results_with_rows 0 649 results_with_rows 0 650 results_with_rows 0 651 results_with_rows 0 652 results_with_rows 0 653 results_with_rows 0 654 results_with_rows 0 655 results_with_rows 0 656 results_with_rows 0 657 results_with_rows 0 658 results_with_rows 0 659 results_with_rows 0 660 results_with_rows 0 661 results_with_rows 0 662 results_with_rows 0 663 results_with_rows 0 664 results_with_rows 0 665 results_with_rows 0 666 results_with_rows 0 667 results_with_rows 0 668 results_with_rows 0 669 results_with_rows 0 670 results_with_rows 0 671 results_with_rows 0 672 results_with_rows 0 673 results_with_rows 0 674 results_with_rows 0 675 results_with_rows 0 676 results_with_rows 0 677 results_with_rows 0 678 results_with_rows 0 679 results_with_rows 0 680 results_with_rows 0 681 results_with_rows 0 682 results_with_rows 0 683 results_with_rows 0 684 results_with_rows 0 685 results_with_rows 0

 results_with_rows 1 0 results_with_rows 1 1 results_with_rows 1 2 results_with_rows 1 3 results_with_rows 1 4 results_with_rows 1 5 results_with_rows 1 6 results_with_rows 1 7 results_with_rows 1 8 results_with_rows 1 9 results_with_rows 1 10 results_with_rows 1 11 results_with_rows 1 12 results_with_rows 1 13 results_with_rows 1 14 results_with_rows 1 15 results_with_rows 1 16 results_with_rows 1 17 results_with_rows 1 18 results_with_rows 1 19 results_with_rows 1 20 results_with_rows 1 21 results_with_rows 1 22 results_with_rows 1 23 results_with_rows 1 24 results_with_rows 1 25 results_with_rows 1 26 results_with_rows 1 27 results_with_rows 1 28 results_with_rows 1 29 results_with_rows 1 30 results_with_rows 1 31 results_with_rows 1 32 results_with_rows 1 33 results_with_rows 1 34 results_with_rows 1 35 results_with_rows 1 36 results_with_rows 1 37 results_with_rows 1 38 results_with_rows 1 39 results_with_rows 1 40 results_with_rows 1 41 

 591 results_with_rows 1 592 results_with_rows 1 593 results_with_rows 1 594 results_with_rows 1 595 results_with_rows 1 596 results_with_rows 1 597 results_with_rows 1 598 results_with_rows 1 599 results_with_rows 1 600 results_with_rows 1 601 results_with_rows 1 602 results_with_rows 1 603 results_with_rows 1 604 results_with_rows 1 605 results_with_rows 1 606 results_with_rows 1 607 results_with_rows 1 608 results_with_rows 1 609 results_with_rows 1 610 results_with_rows 1 611 results_with_rows 1 612 results_with_rows 1 613 results_with_rows 1 614 results_with_rows 1 615 results_with_rows 1 616 results_with_rows 1 617 results_with_rows 1 618 results_with_rows 1 619 results_with_rows 1 620 results_with_rows 1 621 results_with_rows 1 622 results_with_rows 1 623 results_with_rows 1 624 results_with_rows 1 625 results_with_rows 1 626 results_with_rows 1 627 results_with_rows 1 628 results_with_rows 1 629 results_with_rows 1 630 results_with_rows 1

 results_with_rows 2 0 results_with_rows 2 1 results_with_rows 2 2 results_with_rows 2 3 results_with_rows 2 4 results_with_rows 2 5 results_with_rows 2 6 results_with_rows 2 7 results_with_rows 2 8 results_with_rows 2 9 results_with_rows 2 10 results_with_rows 2 11 results_with_rows 2 12 results_with_rows 2 13 results_with_rows 2 14 results_with_rows 2 15 results_with_rows 2 16 results_with_rows 2 17 results_with_rows 2 18 results_with_rows 2 19 results_with_rows 2 20 results_with_rows 2 21 results_with_rows 2 22 results_with_rows 2 23 results_with_rows 2 24 results_with_rows 2 25 results_with_rows 2 26 results_with_rows 2 27 results_with_rows 2 28 results_with_rows 2 29 results_with_rows 2 30 results_with_rows 2 31 results_with_rows 2 32 results_with_rows 2 33 results_with_rows 2 34 results_with_rows 2 35 results_with_rows 2 36 results_with_rows 2 37 results_with_rows 2 38 results_with_rows 2 39 results_with_rows 2 40 results_with_rows 2 41 

2 485 results_with_rows 2 486 results_with_rows 2 487 results_with_rows 2 488 results_with_rows 2 489 results_with_rows 2 490 results_with_rows 2 491 results_with_rows 2 492 results_with_rows 2 493 results_with_rows 2 494 results_with_rows 2 495 results_with_rows 2 496 results_with_rows 2 497 results_with_rows 2 498 results_with_rows 2 499 results_with_rows 2 500 results_with_rows 2 501 results_with_rows 2 502 results_with_rows 2 503 results_with_rows 2 504 results_with_rows 2 505 results_with_rows 2 506 results_with_rows 2 507 results_with_rows 2 508 results_with_rows 2 509 results_with_rows 2 510 results_with_rows 2 511 results_with_rows 2 512 results_with_rows 2 513 results_with_rows 2 514 results_with_rows 2 515 results_with_rows 2 516 results_with_rows 2 517 results_with_rows 2 518 results_with_rows 2 519 results_with_rows 2 520 results_with_rows 2 521 results_with_rows 2 522 results_with_rows 2 523 results_with_rows 2 524 results_with_rows 

 2 920 results_with_rows 2 921 results_with_rows 2 922 results_with_rows 2 923 results_with_rows 2 924 results_with_rows 2 925 results_with_rows 2 926 results_with_rows 2 927 results_with_rows 2 928 results_with_rows 2 929 results_with_rows 2 930 results_with_rows 2 931 results_with_rows 2 932 results_with_rows 2 933 results_with_rows 2 934 results_with_rows 2 935 results_with_rows 2 936 results_with_rows 2 937 results_with_rows 2 938 results_with_rows 2 939 results_with_rows 2 940 results_with_rows 2 941 results_with_rows 2 942 results_with_rows 2 943 results_with_rows 2 944 results_with_rows 2 945 results_with_rows 2 946 results_with_rows 2 947 results_with_rows 2 948 results_with_rows 2 949 results_with_rows 2 950 results_with_rows 2 951 results_with_rows 2 952 results_with_rows 2 953 results_with_rows 2 954 results_with_rows 2 955 results_with_rows 2 956 results_with_rows 2 957 results_with_rows 2 958 results_with_rows 2 959 results_with_rows

 results_with_rows 3 604 results_with_rows 3 605 results_with_rows 3 606 results_with_rows 3 607 results_with_rows 3 608 results_with_rows 3 609 results_with_rows 3 610 results_with_rows 3 611 results_with_rows 3 612 results_with_rows 3 613 results_with_rows 3 614 results_with_rows 3 615 results_with_rows 3 616 results_with_rows 3 617 results_with_rows 3 618 results_with_rows 3 619 results_with_rows 3 620 results_with_rows 3 621 results_with_rows 3 622 results_with_rows 3 623 results_with_rows 3 624 results_with_rows 3 625 results_with_rows 3 626 results_with_rows 3 627 results_with_rows 3 628 results_with_rows 3 629 results_with_rows 3 630 results_with_rows 3 631 results_with_rows 3 632 results_with_rows 3 633 results_with_rows 3 634 results_with_rows 3 635 results_with_rows 3 636 results_with_rows 3 637 results_with_rows 3 638 results_with_rows 3 639 results_with_rows 3 640 results_with_rows 3 641 results_with_rows 3 642 results_with_rows 3 643

 results_with_rows 4 0 results_with_rows 4 1 results_with_rows 4 2 results_with_rows 4 3 results_with_rows 4 4 results_with_rows 4 5 results_with_rows 4 6 results_with_rows 4 7 results_with_rows 4 8 results_with_rows 4 9 results_with_rows 4 10 results_with_rows 4 11 results_with_rows 4 12 results_with_rows 4 13 results_with_rows 4 14 results_with_rows 4 15 results_with_rows 4 16 results_with_rows 4 17 results_with_rows 4 18 results_with_rows 4 19 results_with_rows 4 20 results_with_rows 4 21 results_with_rows 4 22 results_with_rows 4 23 results_with_rows 4 24 results_with_rows 4 25 results_with_rows 4 26 results_with_rows 4 27 results_with_rows 4 28 results_with_rows 4 29 results_with_rows 4 30 results_with_rows 4 31 results_with_rows 4 32 results_with_rows 4 33 results_with_rows 4 34 results_with_rows 4 35 results_with_rows 4 36 results_with_rows 4 37 results_with_rows 4 38 results_with_rows 4 39 results_with_rows 4 40 results_with_rows 4 41 

4 509 results_with_rows 4 510 results_with_rows 4 511 results_with_rows 4 512 results_with_rows 4 513 results_with_rows 4 514 results_with_rows 4 515 results_with_rows 4 516 results_with_rows 4 517 results_with_rows 4 518 results_with_rows 4 519 results_with_rows 4 520 results_with_rows 4 521 results_with_rows 4 522 results_with_rows 4 523 results_with_rows 4 524 results_with_rows 4 525 results_with_rows 4 526 results_with_rows 4 527 results_with_rows 4 528 results_with_rows 4 529 results_with_rows 4 530 results_with_rows 4 531 results_with_rows 4 532 results_with_rows 4 533 results_with_rows 4 534 results_with_rows 4 535 results_with_rows 4 536 results_with_rows 4 537 results_with_rows 4 538 results_with_rows 4 539 results_with_rows 4 540 results_with_rows 4 541 results_with_rows 4 542 results_with_rows 4 543 results_with_rows 4 544 results_with_rows 4 545 results_with_rows 4 546 results_with_rows 4 547 results_with_rows 4 548 results_with_rows 

 982 results_with_rows 4 983 results_with_rows 4 984 results_with_rows 4 985 results_with_rows 4 986 results_with_rows 4 987 results_with_rows 4 988 results_with_rows 4 989 results_with_rows 4 990 results_with_rows 4 991 results_with_rows 4 992 results_with_rows 4 993 results_with_rows 4 994 results_with_rows 4 995 results_with_rows 4 996 results_with_rows 4 997 results_with_rows 4 998 results_with_rows 4 999 results_with_rows 5 0 results_with_rows 5 1 results_with_rows 5 2 results_with_rows 5 3 results_with_rows 5 4 results_with_rows 5 5 results_with_rows 5 6 results_with_rows 5 7 results_with_rows 5 8 results_with_rows 5 9 results_with_rows 5 10 results_with_rows 5 11 results_with_rows 5 12 results_with_rows 5 13 results_with_rows 5 14 results_with_rows 5 15 results_with_rows 5 16 results_with_rows 5 17 results_with_rows 5 18 results_with_rows 5 19 results_with_rows 5 20 results_with_rows 5 21 results_with_rows 5 22 results_with_rows 5 23 res

573 results_with_rows 5 574 results_with_rows 5 575 results_with_rows 5 576 results_with_rows 5 577 results_with_rows 5 578 results_with_rows 5 579 results_with_rows 5 580 results_with_rows 5 581 results_with_rows 5 582 results_with_rows 5 583 results_with_rows 5 584 results_with_rows 5 585 results_with_rows 5 586 results_with_rows 5 587 results_with_rows 5 588 results_with_rows 5 589 results_with_rows 5 590 results_with_rows 5 591 results_with_rows 5 592 results_with_rows 5 593 results_with_rows 5 594 results_with_rows 5 595 results_with_rows 5 596 results_with_rows 5 597 results_with_rows 5 598 results_with_rows 5 599 results_with_rows 5 600 results_with_rows 5 601 results_with_rows 5 602 results_with_rows 5 603 results_with_rows 5 604 results_with_rows 5 605 results_with_rows 5 606 results_with_rows 5 607 results_with_rows 5 608 results_with_rows 5 609 results_with_rows 5 610 results_with_rows 5 611 results_with_rows 5 612 results_with_rows 5 

 results_with_rows 6 0 results_with_rows 6 1 results_with_rows 6 2 results_with_rows 6 3 results_with_rows 6 4 results_with_rows 6 5 results_with_rows 6 6 results_with_rows 6 7 results_with_rows 6 8 results_with_rows 6 9 results_with_rows 6 10 results_with_rows 6 11 results_with_rows 6 12 results_with_rows 6 13 results_with_rows 6 14 results_with_rows 6 15 results_with_rows 6 16 results_with_rows 6 17 results_with_rows 6 18 results_with_rows 6 19 results_with_rows 6 20 results_with_rows 6 21 results_with_rows 6 22 results_with_rows 6 23 results_with_rows 6 24 results_with_rows 6 25 results_with_rows 6 26 results_with_rows 6 27 results_with_rows 6 28 results_with_rows 6 29 results_with_rows 6 30 results_with_rows 6 31 results_with_rows 6 32 results_with_rows 6 33 results_with_rows 6 34 results_with_rows 6 35 results_with_rows 6 36 results_with_rows 6 37 results_with_rows 6 38 results_with_rows 6 39 results_with_rows 6 40 results_with_rows 6 41 

 6 562 results_with_rows 6 563 results_with_rows 6 564 results_with_rows 6 565 results_with_rows 6 566 results_with_rows 6 567 results_with_rows 6 568 results_with_rows 6 569 results_with_rows 6 570 results_with_rows 6 571 results_with_rows 6 572 results_with_rows 6 573 results_with_rows 6 574 results_with_rows 6 575 results_with_rows 6 576 results_with_rows 6 577 results_with_rows 6 578 results_with_rows 6 579 results_with_rows 6 580 results_with_rows 6 581 results_with_rows 6 582 results_with_rows 6 583 results_with_rows 6 584 results_with_rows 6 585 results_with_rows 6 586 results_with_rows 6 587 results_with_rows 6 588 results_with_rows 6 589 results_with_rows 6 590 results_with_rows 6 591 results_with_rows 6 592 results_with_rows 6 593 results_with_rows 6 594 results_with_rows 6 595 results_with_rows 6 596 results_with_rows 6 597 results_with_rows 6 598 results_with_rows 6 599 results_with_rows 6 600 results_with_rows 6 601 results_with_rows

 results_with_rows 7 0 results_with_rows 7 1 results_with_rows 7 2 results_with_rows 7 3 results_with_rows 7 4 results_with_rows 7 5 results_with_rows 7 6 results_with_rows 7 7 results_with_rows 7 8 results_with_rows 7 9 results_with_rows 7 10 results_with_rows 7 11 results_with_rows 7 12 results_with_rows 7 13 results_with_rows 7 14 results_with_rows 7 15 results_with_rows 7 16 results_with_rows 7 17 results_with_rows 7 18 results_with_rows 7 19 results_with_rows 7 20 results_with_rows 7 21 results_with_rows 7 22 results_with_rows 7 23 results_with_rows 7 24 results_with_rows 7 25 results_with_rows 7 26 results_with_rows 7 27 results_with_rows 7 28 results_with_rows 7 29 results_with_rows 7 30 results_with_rows 7 31 results_with_rows 7 32 results_with_rows 7 33 results_with_rows 7 34 results_with_rows 7 35 results_with_rows 7 36 results_with_rows 7 37 results_with_rows 7 38 results_with_rows 7 39 results_with_rows 7 40 results_with_rows 7 41 

 577 results_with_rows 7 578 results_with_rows 7 579 results_with_rows 7 580 results_with_rows 7 581 results_with_rows 7 582 results_with_rows 7 583 results_with_rows 7 584 results_with_rows 7 585 results_with_rows 7 586 results_with_rows 7 587 results_with_rows 7 588 results_with_rows 7 589 results_with_rows 7 590 results_with_rows 7 591 results_with_rows 7 592 results_with_rows 7 593 results_with_rows 7 594 results_with_rows 7 595 results_with_rows 7 596 results_with_rows 7 597 results_with_rows 7 598 results_with_rows 7 599 results_with_rows 7 600 results_with_rows 7 601 results_with_rows 7 602 results_with_rows 7 603 results_with_rows 7 604 results_with_rows 7 605 results_with_rows 7 606 results_with_rows 7 607 results_with_rows 7 608 results_with_rows 7 609 results_with_rows 7 610 results_with_rows 7 611 results_with_rows 7 612 results_with_rows 7 613 results_with_rows 7 614 results_with_rows 7 615 results_with_rows 7 616 results_with_rows 7

 results_with_rows 8 0 results_with_rows 8 1 results_with_rows 8 2 results_with_rows 8 3 results_with_rows 8 4 results_with_rows 8 5 results_with_rows 8 6 results_with_rows 8 7 results_with_rows 8 8 results_with_rows 8 9 results_with_rows 8 10 results_with_rows 8 11 results_with_rows 8 12 results_with_rows 8 13 results_with_rows 8 14 results_with_rows 8 15 results_with_rows 8 16 results_with_rows 8 17 results_with_rows 8 18 results_with_rows 8 19 results_with_rows 8 20 results_with_rows 8 21 results_with_rows 8 22 results_with_rows 8 23 results_with_rows 8 24 results_with_rows 8 25 results_with_rows 8 26 results_with_rows 8 27 results_with_rows 8 28 results_with_rows 8 29 results_with_rows 8 30 results_with_rows 8 31 results_with_rows 8 32 results_with_rows 8 33 results_with_rows 8 34 results_with_rows 8 35 results_with_rows 8 36 results_with_rows 8 37 results_with_rows 8 38 results_with_rows 8 39 results_with_rows 8 40 results_with_rows 8 41 

Things are slow now.

In [8]:
df.head()

Unnamed: 0,URI,Item Label,Project Label,Project URI,Context (1),Context (2),Context (3),Context (4),Context (5),Context URI,...,Has type,Chronotype,Collection Type,Material,Period,Extant Part,Fabric group,match_num,match_name,match_id
0,http://opencontext.org/subjects/8C2A609B-B20B-...,Batch 131 (exp),Pyla-Koutsopetria Archaeological Project I: Pe...,http://opencontext.org/projects/3F6DCD13-A476-...,Cyprus,PKAP Survey Area,Vigla,Unit 501,,http://opencontext.org/subjects/C3EDBDC2-6B6B-...,...,,"Coarse Ware, Roman Late",Survey,pottery,"Roman, Late",rim,coarse,0,,
1,http://opencontext.org/subjects/636D6826-BCBD-...,Batch 17,Pyla-Koutsopetria Archaeological Project I: Pe...,http://opencontext.org/projects/3F6DCD13-A476-...,Cyprus,PKAP Survey Area,Dhekeleia,Unit 223,,http://opencontext.org/subjects/8E773523-4391-...,...,Late Roman 1 Amphora; amphora,"Amphora, Late Roman 1",Survey,pottery,"Roman, Late",handle,amphora,0,,
2,http://opencontext.org/subjects/CA8C3EE0-0F09-...,Batch 19,Pyla-Koutsopetria Archaeological Project I: Pe...,http://opencontext.org/projects/3F6DCD13-A476-...,Cyprus,PKAP Survey Area,West Pyla,Unit 546,,http://opencontext.org/subjects/34C7E1EC-F44D-...,...,Late Roman 1 Amphora; amphora,"Amphora, Late Roman 1",Survey,pottery,"Roman, Late",handle,amphora,0,,
3,http://opencontext.org/subjects/622BEE81-FC52-...,Batch 140 (exp),Pyla-Koutsopetria Archaeological Project I: Pe...,http://opencontext.org/projects/3F6DCD13-A476-...,Cyprus,PKAP Survey Area,Vigla,Unit 501,,http://opencontext.org/subjects/C3EDBDC2-6B6B-...,...,,"Black Glazed, Attic, Classical",Survey,pottery,Classical,handle,fine,0,,
4,http://opencontext.org/subjects/58920419-0245-...,Batch 7 (exp),Pyla-Koutsopetria Archaeological Project I: Pe...,http://opencontext.org/projects/3F6DCD13-A476-...,Cyprus,PKAP Survey Area,Vigla,Unit 501,,http://opencontext.org/subjects/C3EDBDC2-6B6B-...,...,,"Storage, Archaic Basket Handle",Survey,pottery,Archaic-Hellenistic,handle,coarse,0,,


In [9]:
matching_results = {}

p_recon = PeriodoReconciler(host='localhost:8142')

for file in OPENCONTEXT_TEST_FILES:
    csv_path = file['csv_path']
    kw = file['kw']
    print ("\r{}".format(csv_path), end='')
    df = reconcile_to_csv_df(csv_path, kw, p_recon)
    matching_results[csv_path] = df # df.match_num.value_counts()


 results_with_rows 18 999Petra Artifacts.csvh Periods.csvv4 3 6044 606567 979511616 422 6512 602 3 620 624 645 632 5261 933488904 5 615588 7 566 8 91756910 58711 598 12 559551 15 470 15 921 16 582616 538972

KeyboardInterrupt: 

In [None]:
from itertools import islice

for (csv_path, df) in islice(matching_results.items(),None):
    print (csv_path, df.match_num.value_counts().get(0), df.match_num.value_counts().get(1))

        

In [None]:
df = matching_results[OPENCONTEXT_TEST_FILES[1]['csv_path']]

df.head()

# PKAP specifically

In [10]:
# let's look at the PKAP dataset
# look for unique 4-tuples in the PKAP dataset

from collections import Counter

recon_inputs = ['query', 'location', 'start', 'stop']

#path = '../test-data/OpenContext/Cyprus PKAP Survey.csv'
path = '../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv'

#df = matching_results[path]
df = pd.read_csv(path)
df = df.fillna('')

_ = df[[kw_map[path][input_] for input_ in recon_inputs]]
pkap_counts = Counter([tuple(x) for x in _.values])
len(pkap_counts)

78

In [11]:
pkap_counts

Counter({('', 'Cyprus', '-1050', '-312'): 3,
         ('', 'Cyprus', '-1050', '-751'): 3,
         ('', 'Cyprus', '-2500', '-1000'): 1,
         ('', 'Cyprus', '-311', '-100'): 3,
         ('', 'Cyprus', '-311', '299'): 1,
         ('', 'Cyprus', '-474', '-100'): 3,
         ('', 'Cyprus', '-474', '-312'): 2,
         ('', 'Cyprus', '-474', '749'): 1,
         ('', 'Cyprus', '-750', '-100'): 15,
         ('', 'Cyprus', '-750', '-475'): 2,
         ('', 'Cyprus', '-750', '749'): 40,
         ('', 'Cyprus', '-9000', '-9000'): 1,
         ('', 'Cyprus', '-99', '299'): 15,
         ('', 'Cyprus', '-99', '749'): 25,
         ('', 'Cyprus', '-999', '-999'): 1,
         ('', 'Cyprus', '1191', '1191'): 12,
         ('', 'Cyprus', '1191', '1570'): 6,
         ('', 'Cyprus', '1571', '1877'): 1,
         ('', 'Cyprus', '1878', '1878'): 1,
         ('', 'Cyprus', '300', '749'): 216,
         ('', 'Cyprus', '750', '1570'): 1,
         ('', 'Cyprus', 'FALSE', 'FALSE'): 18,
         ('Ancient', 'Cypr

In [12]:
queries = []

# use str(key) as label

for key in pkap_counts.keys():

    q_dict = dict(zip(recon_inputs, key))
    queries.append(
        RQuery(q_dict['query'], label=str(key), 
        properties=[
          RProperty('location', q_dict['location']),
          RProperty('start', q_dict['start']),
          RProperty('stop', q_dict['stop'])
        ])
    )

recon_results = p_recon.reconcile(queries, method='post')

In [13]:
# compare with the hand-reconciled CSV file
# ../test-data/OpenContext/Petra Artifacts-reconiled.csv

import csv

#csv_path = "../test-data/OpenContext/Petra Artifacts-reconciled.csv"
csv_path = "../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv"

with contextlib.ExitStack() as stack:
    csvfile = stack.enter_context(open(csv_path))

    df = pd.read_csv(csvfile)
    df = df.fillna('')
    df['rquery'] = df.apply(lambda row: tuple([row[kw_map[path][input_]] for input_ in recon_inputs]), axis=1)

In [14]:
# establish that Adam was consistent in reconciliation judgments

k = df.groupby('rquery').apply(lambda rows: len(rows['recon-match-id'].unique()))
assert np.alltrue(k==1)

In [15]:
# mapping

reconciled_map = df.groupby('rquery').apply(lambda rows: rows['recon-match-id'].unique()[0])

In [16]:
# now compare mapping to recon_results

def id_loc_in_list(id_, list_):
    if len(id_) == 0:
        if len(list_) == 0:
            return -1
        else:
            return -1
        
    pos = -1
    for (i, item) in enumerate(list_):
        if id_ == item:
            pos = i
            break
            
    return pos


matched_results = []

for key in pkap_counts.keys():
    v = recon_results[str(key)]
    result_ids = [r['id'] for r in v['result']]
    print (key, reconciled_map[key], len(result_ids), id_loc_in_list(reconciled_map[key], result_ids))
    matched_results.append({'qt':key, 'candidate0': reconciled_map[key]})


('Late Roman', 'Cyprus', '300', '749') http://n2t.net/ark:/99152/p0dg76fbqff 1 0
('Classical', 'Cyprus', '-474', '-312') http://n2t.net/ark:/99152/p0dg76fk4nc 8 1
('Archaic-Hellenistic', 'Cyprus', '-750', '-100') http://n2t.net/ark:/99152/p0dg76f4rhx 19 0
('Classical-Hellenistic', 'Cyprus', '-474', '-100') http://n2t.net/ark:/99152/p0dg76fkv78 18 0
('Cypro-Archaic', 'Cyprus', '-750', '-312') http://n2t.net/ark:/99152/p08m57h2cv3 1 0
('Roman', 'Cyprus', '-99', '749') http://n2t.net/ark:/99152/p0dg76fx784 5 0
('Iron Age', 'Cyprus', '-1050', '-312') http://n2t.net/ark:/99152/p0dg76f6zd3 10 0
('Hellenistic', 'Cyprus', '-311', '-100') http://n2t.net/ark:/99152/p0dg76fqj57 11 0
('Hellenistic-Early Roman', 'Cyprus', '-311', '299') http://n2t.net/ark:/99152/p0dg76fk9wq 12 0
('Ancient Historic', 'Cyprus', '-750', '749') http://n2t.net/ark:/99152/p0dg76fxkgg 2 0
('Ancient', 'Cyprus', '-9000', '749') http://n2t.net/ark:/99152/p0dg76fj92d 1 0
('Cypro-Geometric', 'Cyprus', '-1050', '-751') http://n

In [17]:
# compare the sets of 4-tuples that feed into the reconcilers 

raw_tuples = set(pkap_counts.keys())
reconciled_tuples = set(df['rquery'])

raw_tuples == reconciled_tuples

True

In [18]:
# Check by hand

def reconcile(p_recon, qt, candidate0=None):
    """
    returns all candidates, position of candidate0 among the candidates
    """
    queries = [RQuery(qt[0], label='q0', properties=[
            RProperty('location', qt[1]),
            RProperty('start', qt[2]),
            RProperty('end', qt[3])
    ])]
    r = p_recon.reconcile(queries, method='post')['q0']
    
    # import pdb; pdb.set_trace()
    
    results = r['result']
    
    if candidate0 is not None:
        result_ids = [r['id'] for r in results]
        loc_id = id_loc_in_list(candidate0, result_ids)
        if loc_id > -1:
            matched = results[loc_id]['match']
        else:
            matched = False
    else:
        loc_id = -1
    
    return {
        'results': results,
        'candidate0': candidate0,
        'loc_id':loc_id,
        'matched':matched,
        'results_count': len(results)
    }


    


In [19]:
# r = reconcile(p_recon,('Late Roman', 'Cyprus', '300', '749'),  'http://n2t.net/ark:/99152/p0dg76fbqff')
r = reconcile(p_recon,('Classical', 'Cyprus', '-474', '-312'), 'http://n2t.net/ark:/99152/p0dg76fk4nc')

# r = reconcile(p_recon,('Ceramic Age', 'Cyprus', '-1700', '-1700'), 'http://n2t.net/ark:/99152/p0dg76fzg6j')

 
r['loc_id'], r['results_count'], r['results'][r['loc_id']]['name'] if r['loc_id'] > -1 else '', r['matched']

(1, 6, 'Archaic-Classical [Cyprus, Cyprus: -0749 to -0311]', False)

In [20]:
# Loop through the matches done in OpenReconcile
# and redo the reconcilation, calculating whether the results matched and which results matched

results = []

for match in matched_results:
    r = reconcile(p_recon, match['qt'], match['candidate0'])
    try:
        match_name = r['results'][r['loc_id']]['name'] if (r['loc_id'] > -1 and r['results_count'] > 0) else ''
        results.append({'query': match['qt'][0],
                'location': match['qt'][1],
                'start': match['qt'][2],
                'stop': match['qt'][3],
                'loc_id': r['loc_id'], 
                'results_count': r['results_count'], 
                'match_name': match_name,
                'match_id':match['candidate0'],
                'matched':r['matched']
            })
    except Exception as e:
        print (e)

In [21]:
df2 = DataFrame(results, columns=['query', 'location', 'start', 'stop', 'loc_id', 'results_count',
                                  'match_name', 'match_id', 'matched'])
df2.head()

Unnamed: 0,query,location,start,stop,loc_id,results_count,match_name,match_id,matched
0,Late Roman,Cyprus,300,749,0,1,"Late Roman [Cyprus, Cyprus: 0300 to 0749]",http://n2t.net/ark:/99152/p0dg76fbqff,True
1,Classical,Cyprus,-474,-312,1,6,"Archaic-Classical [Cyprus, Cyprus: -0749 to -0...",http://n2t.net/ark:/99152/p0dg76fk4nc,False
2,Archaic-Hellenistic,Cyprus,-750,-100,0,9,"Archaic-Hellenistic [Cyprus, Cyprus: -0749 to ...",http://n2t.net/ark:/99152/p0dg76f4rhx,False
3,Classical-Hellenistic,Cyprus,-474,-100,5,9,"Classical-Hellenistic [Cyprus, Cyprus: -0473 t...",http://n2t.net/ark:/99152/p0dg76fkv78,False
4,Cypro-Archaic,Cyprus,-750,-312,0,1,Cypro-Archaic [Cyprus: -0749 to -0449],http://n2t.net/ark:/99152/p08m57h2cv3,True


In [22]:
df2.matched.value_counts()

False    60
True     18
Name: matched, dtype: int64

In [23]:
df2[df2.matched]

Unnamed: 0,query,location,start,stop,loc_id,results_count,match_name,match_id,matched
0,Late Roman,Cyprus,300,749,0,1,"Late Roman [Cyprus, Cyprus: 0300 to 0749]",http://n2t.net/ark:/99152/p0dg76fbqff,True
4,Cypro-Archaic,Cyprus,-750,-312,0,1,Cypro-Archaic [Cyprus: -0749 to -0449],http://n2t.net/ark:/99152/p08m57h2cv3,True
10,Ancient,Cyprus,-9000,749,0,1,"Ancient [Cyprus, Cyprus: -8999 to 0749]",http://n2t.net/ark:/99152/p0dg76fj92d,True
14,Early Modern,Cyprus,1878,1949,0,1,"Early Modern [Cyprus, Cyprus: 1878 to 1949]",http://n2t.net/ark:/99152/p0dg76fcnx9,True
20,Classical,Cyprus,-750,-475,0,1,"Archaic-Classical [Cyprus, Cyprus: -0749 to -0...",http://n2t.net/ark:/99152/p0dg76fk4nc,True
22,Ceramic Age,Cyprus,-1700,-1700,0,1,"Ceramic Age [Cyprus, Cyprus: -3499 to 2008]",http://n2t.net/ark:/99152/p0dg76fzg6j,True
26,Post-Prehistoric,Cyprus,-999,-999,0,1,"Post-Prehistoric [Cyprus, Cyprus: -0998 to 2008]",http://n2t.net/ark:/99152/p0dg76f98q7,True
29,Unknown,Cyprus,-9000,-9000,0,1,"Unknown [Cyprus, Cyprus: -8999 to 2008]",http://n2t.net/ark:/99152/p0dg76f2smn,True
35,Cypro-Archaic,Cyprus,-750,-475,0,1,Cypro-Archaic [Cyprus: -0749 to -0449],http://n2t.net/ark:/99152/p08m57h2cv3,True
37,Ancient-Medieval,Cyprus,-3500,1570,0,1,"Ancient-Medieval [Cyprus, Cyprus: -3499 to 1570]",http://n2t.net/ark:/99152/p0dg76fjh5t,True


# sort these rows

* matched = True, False (matched desc)
* if loc_id > -1, ==-1
* loc_id ascending 



In [24]:
df2['sort_key'] = df2.apply(lambda row: (not row['matched'], 
                                         row['loc_id'] < 0, 
                                         row['loc_id'], 
                                         len(row['query']) == 0),
                             axis=1)
df2.head()

Unnamed: 0,query,location,start,stop,loc_id,results_count,match_name,match_id,matched,sort_key
0,Late Roman,Cyprus,300,749,0,1,"Late Roman [Cyprus, Cyprus: 0300 to 0749]",http://n2t.net/ark:/99152/p0dg76fbqff,True,"(False, False, 0, False)"
1,Classical,Cyprus,-474,-312,1,6,"Archaic-Classical [Cyprus, Cyprus: -0749 to -0...",http://n2t.net/ark:/99152/p0dg76fk4nc,False,"(True, False, 1, False)"
2,Archaic-Hellenistic,Cyprus,-750,-100,0,9,"Archaic-Hellenistic [Cyprus, Cyprus: -0749 to ...",http://n2t.net/ark:/99152/p0dg76f4rhx,False,"(True, False, 0, False)"
3,Classical-Hellenistic,Cyprus,-474,-100,5,9,"Classical-Hellenistic [Cyprus, Cyprus: -0473 t...",http://n2t.net/ark:/99152/p0dg76fkv78,False,"(True, False, 5, False)"
4,Cypro-Archaic,Cyprus,-750,-312,0,1,Cypro-Archaic [Cyprus: -0749 to -0449],http://n2t.net/ark:/99152/p08m57h2cv3,True,"(False, False, 0, False)"


In [25]:
df2.sort_values(by='sort_key')

Unnamed: 0,query,location,start,stop,loc_id,results_count,match_name,match_id,matched,sort_key
0,Late Roman,Cyprus,300,749,0,1,"Late Roman [Cyprus, Cyprus: 0300 to 0749]",http://n2t.net/ark:/99152/p0dg76fbqff,True,"(False, False, 0, False)"
59,Unknown,Cyprus,-311,-311,0,1,"Unknown [Cyprus, Cyprus: -8999 to 2008]",http://n2t.net/ark:/99152/p0dg76f2smn,True,"(False, False, 0, False)"
52,Ceramic Age,Cyprus,-3500,-3500,0,1,"Ceramic Age [Cyprus, Cyprus: -3499 to 2008]",http://n2t.net/ark:/99152/p0dg76fzg6j,True,"(False, False, 0, False)"
48,Early Medieval,Cyprus,750,1190,0,1,"Early Medieval [Cyprus, Cyprus: 0750 to 1190]",http://n2t.net/ark:/99152/p0dg76fq5c5,True,"(False, False, 0, False)"
47,Ancient,Cyprus,-3500,749,0,1,"Ancient [Cyprus, Cyprus: -8999 to 0749]",http://n2t.net/ark:/99152/p0dg76fj92d,True,"(False, False, 0, False)"
45,Unknown,Cyprus,-2500,-2500,0,1,"Unknown [Cyprus, Cyprus: -8999 to 2008]",http://n2t.net/ark:/99152/p0dg76f2smn,True,"(False, False, 0, False)"
43,Late Helladic-Modern Present,Cyprus,-1700,-1700,0,1,"Bronze Age-Modern Present [Cyprus, Cyprus: -24...",http://n2t.net/ark:/99152/p0dg76f6gfq,True,"(False, False, 0, False)"
41,Roman-Modern,Cyprus,-99,-99,0,1,"Roman-Modern [Cyprus, Cyprus: -0098 to 2008]",http://n2t.net/ark:/99152/p0dg76fd4wb,True,"(False, False, 0, False)"
37,Ancient-Medieval,Cyprus,-3500,1570,0,1,"Ancient-Medieval [Cyprus, Cyprus: -3499 to 1570]",http://n2t.net/ark:/99152/p0dg76fjh5t,True,"(False, False, 0, False)"
35,Cypro-Archaic,Cyprus,-750,-475,0,1,Cypro-Archaic [Cyprus: -0749 to -0449],http://n2t.net/ark:/99152/p08m57h2cv3,True,"(False, False, 0, False)"


In [26]:
df2.loc_id.value_counts()

 0    36
-1    27
 1     5
 2     3
 7     2
 3     2
 6     1
 5     1
 4     1
Name: loc_id, dtype: int64

In [27]:
!ls ../test-data/

[34mOpenContext[m[m                      periodo_simple_example-recon.csv
[34mperiodo_reconciler_testdata[m[m      periodo_simple_example.csv


In [28]:
df2.sort_values(by='sort_key').to_csv('../test-data/OpenContext/Cyprus PKAP Survey-2019.04.02.csv')

## rows matched vs not matched for each test file

In [29]:

rows_html = ("".join(["<tr><td>{}</td><td>{}</td><td>{}</td></tr>".format(
 csv_path, df.match_num.value_counts().get(1, 0), df.match_num.value_counts().get(0, 0))
for (csv_path, df) in matching_results.items()]))


html_ = """<table>
<tr>
    <th>path</th>
    <th>matches</th>
    <th>non-matches</th>
</tr>
{}
</table>""".format(rows_html)

display(HTML(html_))

path,matches,non-matches
../test-data/OpenContext/Cyprus PKAP Survey.csv,456,7981
../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv,3056,5381
../test-data/OpenContext/European Cattle with Periods.csv,398,5270


## which combination of query/location/start/stop were matched?

In [30]:
matching_results.keys()

dict_keys(['../test-data/OpenContext/Cyprus PKAP Survey.csv', '../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv', '../test-data/OpenContext/European Cattle with Periods.csv'])

In [31]:
rows_html = []

for (csv_path, df) in matching_results.items():
    kw = kw_map[csv_path]
    counter = Counter(list(df.apply(lambda row: (row[kw['query']],
                      row[kw['location']],
                      row[kw['start']],
                      row[kw['stop']],
                      row['match_num'],
                      row['match_id'],
                      row['match_name']
                     ), axis=1)))
    
    matching_items = [(k,c) for (k,c) in counter.items() if k[4] > 0]

    for (match, count) in matching_items:
        row_html = """<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td>
                      <td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>""".format(
             csv_path, match[0], match[1], match[2], match[3], count, match[5], match[6])
        rows_html.append(row_html)
        

rows_html = "".join(rows_html)

html_ = """<table>
<tr>
    <th>path</th>
    <th>query</th>
    <th>location</th>
    <th>start</th>
    <th>stop</th>
    <th>num of rows</th>
    <th>match_id</th>
    <th>match_name</th>
</tr>
{}
</table>""".format(rows_html)
        
display(HTML(html_))

path,query,location,start,stop,num of rows,match_id,match_name
../test-data/OpenContext/Cyprus PKAP Survey.csv,Cypro-Archaic,Cyprus,-750,-312,3,http://n2t.net/ark:/99152/p08m57h2cv3,Cypro-Archaic [Cyprus: -0749 to -0449]
../test-data/OpenContext/Cyprus PKAP Survey.csv,Ancient,Cyprus,-9000,749,28,http://n2t.net/ark:/99152/p0dg76fj92d,"Ancient [Cyprus, Cyprus: -8999 to 0749]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Ceramic Age,Cyprus,-1700,-1700,1,http://n2t.net/ark:/99152/p0dg76fzg6j,"Ceramic Age [Cyprus, Cyprus: -3499 to 2008]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Post-Prehistoric,Cyprus,-999,-999,60,http://n2t.net/ark:/99152/p0dg76f98q7,"Post-Prehistoric [Cyprus, Cyprus: -0998 to 2008]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Unknown,Cyprus,-9000,-9000,179,http://n2t.net/ark:/99152/p0dg76f2smn,"Unknown [Cyprus, Cyprus: -8999 to 2008]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Cypro-Archaic,Cyprus,-750,-475,1,http://n2t.net/ark:/99152/p08m57h2cv3,Cypro-Archaic [Cyprus: -0749 to -0449]
../test-data/OpenContext/Cyprus PKAP Survey.csv,Ancient-Medieval,Cyprus,-3500,1570,92,http://n2t.net/ark:/99152/p0dg76fjh5t,"Ancient-Medieval [Cyprus, Cyprus: -3499 to 1570]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Roman-Modern,Cyprus,-99,-99,3,http://n2t.net/ark:/99152/p0dg76fd4wb,"Roman-Modern [Cyprus, Cyprus: -0098 to 2008]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Late Helladic-Modern Present,Cyprus,-1700,-1700,5,http://n2t.net/ark:/99152/p0dg76f6gfq,"Bronze Age-Modern Present [Cyprus, Cyprus: -2499 to 2008]"
../test-data/OpenContext/Cyprus PKAP Survey.csv,Unknown,Cyprus,-2500,-2500,76,http://n2t.net/ark:/99152/p0dg76f2smn,"Unknown [Cyprus, Cyprus: -8999 to 2008]"


In [32]:
len(counter)

168

# studying non-matches vs matches

In [33]:
# how to merge two counters?

from collections import Counter

def recon_data(df, csv_path, kw):

    if len(df):
        k0 = df.apply(lambda row: (
                          csv_path,
                          row[kw['query']],
                          row[kw['location']],
                          row[kw['start']],
                          row[kw['stop']],
                          row['match_num'],
                          row['match_id'],
                          row['match_name']
                         ), axis=1)
        k = list(k0)
        return (k)
    else:
        return []
    

matches = Counter()
non_matches = Counter()

for (csv_path, df) in matching_results.items():
    kw = kw_map[csv_path]
    matches.update(recon_data(df[df.match_num > 0], csv_path, kw))
    non_matches.update(recon_data(df[df.match_num == 0], csv_path, kw))



len(matches), len(non_matches)

(52, 273)

In [34]:
(csv_path, df) = list(matching_results.items())[1]
kw = kw_map[csv_path]
df2 = df[df.match_num > 0]
k = recon_data(df2, csv_path, kw)
len(k)

3056

In [35]:
for row in matches.keys():
    print(row)

('../test-data/OpenContext/Cyprus PKAP Survey.csv', 'Cypro-Archaic', 'Cyprus', '-750', '-312', 1, 'http://n2t.net/ark:/99152/p08m57h2cv3', 'Cypro-Archaic [Cyprus: -0749 to -0449]')
('../test-data/OpenContext/Cyprus PKAP Survey.csv', 'Ancient', 'Cyprus', '-9000', '749', 1, 'http://n2t.net/ark:/99152/p0dg76fj92d', 'Ancient [Cyprus, Cyprus: -8999 to 0749]')
('../test-data/OpenContext/Cyprus PKAP Survey.csv', 'Ceramic Age', 'Cyprus', '-1700', '-1700', 1, 'http://n2t.net/ark:/99152/p0dg76fzg6j', 'Ceramic Age [Cyprus, Cyprus: -3499 to 2008]')
('../test-data/OpenContext/Cyprus PKAP Survey.csv', 'Post-Prehistoric', 'Cyprus', '-999', '-999', 1, 'http://n2t.net/ark:/99152/p0dg76f98q7', 'Post-Prehistoric [Cyprus, Cyprus: -0998 to 2008]')
('../test-data/OpenContext/Cyprus PKAP Survey.csv', 'Unknown', 'Cyprus', '-9000', '-9000', 1, 'http://n2t.net/ark:/99152/p0dg76f2smn', 'Unknown [Cyprus, Cyprus: -8999 to 2008]')
('../test-data/OpenContext/Cyprus PKAP Survey.csv', 'Cypro-Archaic', 'Cyprus', '-750'

In [36]:
[ "|{}|".format("|".join([str(col) for col in row]))
 for row in matches.keys()]

['|../test-data/OpenContext/Cyprus PKAP Survey.csv|Cypro-Archaic|Cyprus|-750|-312|1|http://n2t.net/ark:/99152/p08m57h2cv3|Cypro-Archaic [Cyprus: -0749 to -0449]|',
 '|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ancient|Cyprus|-9000|749|1|http://n2t.net/ark:/99152/p0dg76fj92d|Ancient [Cyprus, Cyprus: -8999 to 0749]|',
 '|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ceramic Age|Cyprus|-1700|-1700|1|http://n2t.net/ark:/99152/p0dg76fzg6j|Ceramic Age [Cyprus, Cyprus: -3499 to 2008]|',
 '|../test-data/OpenContext/Cyprus PKAP Survey.csv|Post-Prehistoric|Cyprus|-999|-999|1|http://n2t.net/ark:/99152/p0dg76f98q7|Post-Prehistoric [Cyprus, Cyprus: -0998 to 2008]|',
 '|../test-data/OpenContext/Cyprus PKAP Survey.csv|Unknown|Cyprus|-9000|-9000|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|',
 '|../test-data/OpenContext/Cyprus PKAP Survey.csv|Cypro-Archaic|Cyprus|-750|-475|1|http://n2t.net/ark:/99152/p08m57h2cv3|Cypro-Archaic [Cyprus: -0749 to -0449]|',
 '|../

In [37]:
from IPython.display import Markdown, HTML, display
from jinja2 import Template

matches_template = Template("""
|path|query|location|start|stop|match_id|match_name|
|--|--|--|--|--|--|--|
{% for item in items %}{{item}}\n{% endfor %}
""")

Markdown(matches_template.render(items=[ "|{}|".format("|".join([str(col) for col in row]))
 for row in matches.keys()]))


|path|query|location|start|stop|match_id|match_name|
|--|--|--|--|--|--|--|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Cypro-Archaic|Cyprus|-750|-312|1|http://n2t.net/ark:/99152/p08m57h2cv3|Cypro-Archaic [Cyprus: -0749 to -0449]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ancient|Cyprus|-9000|749|1|http://n2t.net/ark:/99152/p0dg76fj92d|Ancient [Cyprus, Cyprus: -8999 to 0749]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ceramic Age|Cyprus|-1700|-1700|1|http://n2t.net/ark:/99152/p0dg76fzg6j|Ceramic Age [Cyprus, Cyprus: -3499 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Post-Prehistoric|Cyprus|-999|-999|1|http://n2t.net/ark:/99152/p0dg76f98q7|Post-Prehistoric [Cyprus, Cyprus: -0998 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Unknown|Cyprus|-9000|-9000|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Cypro-Archaic|Cyprus|-750|-475|1|http://n2t.net/ark:/99152/p08m57h2cv3|Cypro-Archaic [Cyprus: -0749 to -0449]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ancient-Medieval|Cyprus|-3500|1570|1|http://n2t.net/ark:/99152/p0dg76fjh5t|Ancient-Medieval [Cyprus, Cyprus: -3499 to 1570]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Roman-Modern|Cyprus|-99|-99|1|http://n2t.net/ark:/99152/p0dg76fd4wb|Roman-Modern [Cyprus, Cyprus: -0098 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Late Helladic-Modern Present|Cyprus|-1700|-1700|1|http://n2t.net/ark:/99152/p0dg76f6gfq|Bronze Age-Modern Present [Cyprus, Cyprus: -2499 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Unknown|Cyprus|-2500|-2500|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Late Roman|Cyprus|300|749|1|http://n2t.net/ark:/99152/p0dg76fbqff|Late Roman [Cyprus, Cyprus: 0300 to 0749]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ancient|Cyprus|-3500|749|1|http://n2t.net/ark:/99152/p0dg76fj92d|Ancient [Cyprus, Cyprus: -8999 to 0749]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Ceramic Age|Cyprus|-3500|-3500|1|http://n2t.net/ark:/99152/p0dg76fzg6j|Ceramic Age [Cyprus, Cyprus: -3499 to 2008]|
|../test-data/OpenContext/Cyprus PKAP Survey.csv|Unknown|Cyprus|-311|-311|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Late Roman|Cyprus|300|749|1|http://n2t.net/ark:/99152/p0dg76fbqff|Late Roman [Cyprus, Cyprus: 0300 to 0749]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Cypro-Archaic|Cyprus|-750|-312|1|http://n2t.net/ark:/99152/p08m57h2cv3|Cypro-Archaic [Cyprus: -0749 to -0449]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Ancient|Cyprus|-9000|749|1|http://n2t.net/ark:/99152/p0dg76fj92d|Ancient [Cyprus, Cyprus: -8999 to 0749]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Early Modern|Cyprus|1878|1949|1|http://n2t.net/ark:/99152/p0dg76fcnx9|Early Modern [Cyprus, Cyprus: 1878 to 1949]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Ceramic Age|Cyprus|-1700|-1700|1|http://n2t.net/ark:/99152/p0dg76fzg6j|Ceramic Age [Cyprus, Cyprus: -3499 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Post-Prehistoric|Cyprus|-999|-999|1|http://n2t.net/ark:/99152/p0dg76f98q7|Post-Prehistoric [Cyprus, Cyprus: -0998 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Unknown|Cyprus|-9000|-9000|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Cypro-Archaic|Cyprus|-750|-475|1|http://n2t.net/ark:/99152/p08m57h2cv3|Cypro-Archaic [Cyprus: -0749 to -0449]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Ancient-Medieval|Cyprus|-3500|1570|1|http://n2t.net/ark:/99152/p0dg76fjh5t|Ancient-Medieval [Cyprus, Cyprus: -3499 to 1570]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Early Medieval|Cyprus|1191|1570|1|http://n2t.net/ark:/99152/p0dg76fq5c5|Early Medieval [Cyprus, Cyprus: 0750 to 1190]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Roman-Modern|Cyprus|-99|-99|1|http://n2t.net/ark:/99152/p0dg76fd4wb|Roman-Modern [Cyprus, Cyprus: -0098 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Late Helladic-Modern Present|Cyprus|-1700|-1700|1|http://n2t.net/ark:/99152/p0dg76f6gfq|Bronze Age-Modern Present [Cyprus, Cyprus: -2499 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Unknown|Cyprus|-2500|-2500|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Ancient|Cyprus|-3500|749|1|http://n2t.net/ark:/99152/p0dg76fj92d|Ancient [Cyprus, Cyprus: -8999 to 0749]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Early Medieval|Cyprus|750|1190|1|http://n2t.net/ark:/99152/p0dg76fq5c5|Early Medieval [Cyprus, Cyprus: 0750 to 1190]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Ceramic Age|Cyprus|-3500|-3500|1|http://n2t.net/ark:/99152/p0dg76fzg6j|Ceramic Age [Cyprus, Cyprus: -3499 to 2008]|
|../test-data/OpenContext/Cyprus-PKAP-Survey-2019-03-29.csv|Unknown|Cyprus|-311|-311|1|http://n2t.net/ark:/99152/p0dg76f2smn|Unknown [Cyprus, Cyprus: -8999 to 2008]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Sweden|-7500|-7400|1|http://n2t.net/ark:/99152/p0qhb66jfkb|Äldre stenålder [Sweden, Sweden: -8200 to -4200]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Sweden|-7470|-7440|1|http://n2t.net/ark:/99152/p0qhb66jfkb|Äldre stenålder [Sweden, Sweden: -8200 to -4200]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Sweden|-6206|-6017|1|http://n2t.net/ark:/99152/p0qhb66jfkb|Äldre stenålder [Sweden, Sweden: -8200 to -4200]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Sweden|-6590|-6570|1|http://n2t.net/ark:/99152/p0qhb66jfkb|Äldre stenålder [Sweden, Sweden: -8200 to -4200]|
|../test-data/OpenContext/European Cattle with Periods.csv|Late Bronze-Early Iron Age|Germany|-1500|-500|1|http://n2t.net/ark:/99152/p0qhb66qqd2|Späte Bronzezeit [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -1300 to -0800]|
|../test-data/OpenContext/European Cattle with Periods.csv|Post-Roman to Medieval|Germany|1300|1400|1|http://n2t.net/ark:/99152/p0qhb662qrr|Mittelalter [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: 0568 to 1499]|
|../test-data/OpenContext/European Cattle with Periods.csv|Post-Roman to Medieval|Germany|800|900|1|http://n2t.net/ark:/99152/p0qhb662qrr|Mittelalter [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: 0568 to 1499]|
|../test-data/OpenContext/European Cattle with Periods.csv|Post-Roman to Medieval|Germany|700|800|1|http://n2t.net/ark:/99152/p0qhb662qrr|Mittelalter [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: 0568 to 1499]|
|../test-data/OpenContext/European Cattle with Periods.csv|Middle Neolithic|Germany|-4927|-4370|1|http://n2t.net/ark:/99152/p0qhb66wng7|Mittel Neolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -4800 to -4251]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Germany|-7400|-7000|1|http://n2t.net/ark:/99152/p0qhb668f69|Mesolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -9550 to -5601]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Spain|-9390|-8350|1|http://n2t.net/ark:/99152/p06v8w4qsqx|Mesolítico [Spain, Spain: -9999 to -5000]|
|../test-data/OpenContext/European Cattle with Periods.csv|Post-Roman to Medieval|Germany|1400|1500|1|http://n2t.net/ark:/99152/p0qhb662qrr|Mittelalter [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: 0568 to 1499]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Sweden|-7480|-7450|1|http://n2t.net/ark:/99152/p0qhb66jfkb|Äldre stenålder [Sweden, Sweden: -8200 to -4200]|
|../test-data/OpenContext/European Cattle with Periods.csv|Neolithic|Germany|-4245|-3371|1|http://n2t.net/ark:/99152/p0qhb66d5b9|Neolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -5600 to -4001]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic-Neolithic|Germany|-5200|-3980|1|http://n2t.net/ark:/99152/p0qhb66d5b9|Neolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -5600 to -4001]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Spain|-6000|-5600|1|http://n2t.net/ark:/99152/p06v8w4qsqx|Mesolítico [Spain, Spain: -9999 to -5000]|
|../test-data/OpenContext/European Cattle with Periods.csv|Mesolithic|Spain|-10000|-7503|1|http://n2t.net/ark:/99152/p06v8w4qsqx|Mesolítico [Spain, Spain: -9999 to -5000]|
|../test-data/OpenContext/European Cattle with Periods.csv|Early Neolithic|Germany|-5800|-3000|1|http://n2t.net/ark:/99152/p0qhb66q5vq|Frühes Neolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -5600 to -4801]|
|../test-data/OpenContext/European Cattle with Periods.csv|Early Neolithic|Germany|-5611|-4749|1|http://n2t.net/ark:/99152/p0qhb66q5vq|Frühes Neolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -5600 to -4801]|
|../test-data/OpenContext/European Cattle with Periods.csv|Middle Neolithic|Germany|-4800|-4200|1|http://n2t.net/ark:/99152/p0qhb66wng7|Mittel Neolithikum [Germany, Switzerland, Czech Republic, Liechtenstein, Germany, Switzerland, Czech Republic, Liechtenstein: -4800 to -4251]|
|../test-data/OpenContext/European Cattle with Periods.csv|Post-Roman to Medieval|Poland|1300|1400|1|http://n2t.net/ark:/99152/p0zj6g8q32k|Medieval [Poland, Poland: 0600 to 1250]|


In [38]:
# matches that are wrong

queries = [
    RQuery("Not determined", label="with query",  properties=[
        RProperty('location', 'Spain'),
        RProperty('start', -1500),
        RProperty('end', -714)
    ]),     
    RQuery("", label="empty query",  properties=[
        RProperty('location', 'Spain'),
        RProperty('start', -1500),
        RProperty('end', -714)
    ]),
    RQuery("Spain", label="use location for query",  properties=[
        RProperty('location', 'Spain'),
        RProperty('start', -1500),
        RProperty('end', -714)
    ]),
    RQuery("Bronze", label="use location for query",  properties=[
        RProperty('location', 'Spain'),
        RProperty('start', -1500),
        RProperty('end', -714)
    ])   
]

r = p_recon.reconcile(queries, method='post')

r

{'empty query': {'result': []},
 'use location for query': {'result': [{'id': 'http://n2t.net/ark:/99152/p06v8w4q5mm',
    'match': False,
    'name': 'Edad del Bronce [Spain, Spain: -2299 to -1000]',
    'score': 0,
    'type': [{'id': 'http://www.w3.org/2004/02/skos/core#Concept',
      'name': 'Period definition'}]},
   {'id': 'http://n2t.net/ark:/99152/p0qhb668ps5',
    'match': False,
    'name': 'Edad del Bronce [Spain, Spain: -2300 to -0801]',
    'score': 0,
    'type': [{'id': 'http://www.w3.org/2004/02/skos/core#Concept',
      'name': 'Period definition'}]},
   {'id': 'http://n2t.net/ark:/99152/p07h9k65xxf',
    'match': False,
    'name': 'Edad del Bronce [Galicia and neighbouring areas in NW Spain, Galicia, Asturias, Castilla y Leon: -2300 to -0800]',
    'score': 1,
    'type': [{'id': 'http://www.w3.org/2004/02/skos/core#Concept',
      'name': 'Period definition'}]}]},
 'with query': {'result': []}}