## Create new manifestation measurements

The following code is used to create new statistics. They can be saved to a CSV file and be taken into account in the section above about existing measurements.

In [1]:
import pandas as pd
import sys, os

sys.path.insert(1, os.pardir)
import utils
import utils_stats

## Redo manifestation measurements

In [2]:
manifestationFilesConfig = {
    '2022-05-03': ['2022-05-03', 'Data integrated from KBR, BnF and KB based on ISBN10 and ISBN13 identifiers.'],
    '2022-05-09': ['2022-05-09', 'Data integrated from KBR, BnF and KB based on ISBN10 and ISBN13 identifiers. Additionally KBR original title from field 246.'],
    '2022-05-23': ['2022-05-23', 'No changes to previous version.'],
    '2022-06-07': ['2022-06-07', 'Added publishing directors and included a collumn for source titles from KB'],
    '2022-06-16': ['2022-06-16', 'Removed duplicate manifestations with the same ISBN10/ISBN13'],
    '2022-06-20': ['2022-06-20', 'New KBR data dump with plenty of additions and refinements and ISBN fix for FR-NL'],
    '2022-06-23': ['2022-06-23', 'No ISBN duplicate removal anymore to prevent data loss'],
    '2022-06-24': ['2022-06-24', 'New KBR dump with old Dutch, old French and middle French as well as new data in general'],
    '2022-07-18': ['2022-07-18', 'Dataprofile query filtering also for Belgian organizations, not just Belgian persons (KB data was also updated from SPARQL endpoint, but did not cause more translations)'],
    '2022-07-20': ['2022-07-20', 'Fixed error in ISNI-SRU integration SPARQL query.'],
    '2022-07-21': ['2022-07-21', 'Manually curated Wikidata overlap used as bridge identifeir with adapted integration SPARQL queries.'],
    '2022-07-25': ['2022-07-25', 'Contributors integrated using also ISNI identifiers retrieved from Wikidata via QID'],
    '2022-07-26': ['2022-07-26', 'Added roughly 5,000 more contributors from BnF, providing additional nationality information'],
    '2022-07-27': ['2022-07-27', 'Added missing nationalities from Wikidata'],
    '2022-08-08': ['2022-08-08', "Added source links KBR identified via exact title match or 0.9 similarity"],
    '2022-08-11': ['2022-08-11', "New export from the KBR catalogue"],
    '2022-08-18': ['2022-08-18', "Strictly split translations and originals into different named graphs"],
    '2022-09-05': ['2022-09-05', "Integration changes: dynamic SPARQL instead of files"],
    '2022-09-07': ['2022-09-07', "Added a KBR export of Belgian person authorities"],
    '2022-09-08': ['2022-09-08', "fetched more KBR ISNIs because of fixed parsing and fixed Wikidata contributor update via ISNI"],
    '2022-09-12': ['2022-09-12', "New export from KBR"],
    '2022-09-27': ['2022-09-27', "Fixed contributor overlapping to reduce duplicates"],
    '2022-11-30': ['2022-11-30', "New export from KBR and removed Wikidata correlation list as source"],
    '2022-12-09': ['2022-12-09', "display integrted publisher information"],
    '2022-12-15': ['2022-12-15', "added available BnF source titles"]
}

In [3]:
utils_stats.redoManifestationsCorpusMeasurements(manifestationFilesConfig, '../corpus-versions', 'integrated-data-enriched.csv', 'translation-stats') 

## Redo contributor measurements

In [4]:
contributorFilesConfig = {
    '2022-05-03': ['2022-05-03', 'Contributors integrated from ISNI SRU dump, KBR, BnF and NTA based on ISNI, VIAF and Wikidata identifiers.'],
    '2022-05-09': ['2022-05-09', 'Contributors integrated from ISNI SRU dump, KBR, BnF and NTA based on ISNI, VIAF and Wikidata identifiers.'],
    '2022-05-23': ['2022-05-23', 'Only count persons who are actually contributing to manifestations of the corpus (author, translator, illustrator, scenarist, publishing director)'],
    '2022-06-07': ['2022-06-07', 'Added columns with statistics about how many books a contributor authored, translated etc.'],
    '2022-06-16': ['2022-06-16', 'Added columns with statistics about how many books a contributor authored, translated etc.'],
    '2022-06-20': ['2022-06-20', 'New KBR data dump with plenty of additions and refinements and ISBN fix for FR-NL.'],
    '2022-06-23': ['2022-06-23', 'No ISBN duplicate removal anymore to prevent data loss'],
    '2022-06-24': ['2022-06-24', 'New KBR dump with old Dutch, old French and middle French as well as new data in general'],
    '2022-07-18': ['2022-07-18', 'Dataprofile query filtering also for Belgian organizations, not just Belgian persons (KB data was also updated from SPARQL endpoint, but did not cause more translations)'],
    '2022-07-20': ['2022-07-20', 'Fixed error in ISNI-SRU integration SPARQL query.'],
    '2022-07-21': ['2022-07-21', 'Manually curated Wikidata overlap used as bridge identifeir with adapted integration SPARQL queries.'],
    '2022-07-25': ['2022-07-25', 'Integrated using also ISNI identifiers retrieved from Wikidata via QID'],
    '2022-07-26': ['2022-07-26', 'Added roughly 5,000 more contributors from BnF, providing additional nationality information'],
    '2022-07-27': ['2022-07-27', 'Added missing nationalities from Wikidata'],
    '2022-08-08': ['2022-08-08', "Added source links KBR identified via exact title match or 0.9 similarity"],
    '2022-08-11': ['2022-08-11', "New export from the KBR catalogue"],
    '2022-08-18': ['2022-08-18', "Strictly split translations and originals into different named graphs"],
    '2022-09-05': ['2022-09-05', "Integration changes: dynamic SPARQL instead of files"],
    '2022-09-07': ['2022-09-07', "Added a KBR export of Belgian person authorities"],
    '2022-09-08': ['2022-09-08', "fetched more KBR ISNIs because of fixed parsing and fixed Wikidata contributor update via ISNI"],
    '2022-09-12': ['2022-09-12', "New export from KBR"],
    '2022-09-27': ['2022-09-27', "Fixed contributor overlapping to reduce duplicates"],
    '2022-11-30': ['2022-11-30', "New export from KBR and removed Wikidata correlation list as source"],
    '2022-12-09': ['2022-12-09', "display integrted publisher information"],
    '2022-12-15': ['2022-12-15', "added available BnF source titles"]
}

In [5]:
utils_stats.redoContributorCorpusMeasurements(contributorFilesConfig, '../corpus-versions', 'contributors-persons.csv', 'person-contributor-stats') 