## Create new manifestation measurements

The following code is used to create new statistics. They can be saved to a CSV file and be taken into account in the section above about existing measurements.

In [1]:
import pandas as pd
import utils
from datetime import datetime

In [2]:
corpus = pd.read_csv('./2022-06-20_with-duplicate-removing/csv/integrated-data-enriched.csv', index_col='targetIdentifier')
corpusNLFR = corpus[(corpus['sourceLanguage'] == 'Dutch') & (corpus['targetLanguage'] == 'French') ]
corpusFRNL = corpus[(corpus['sourceLanguage'] == 'French') & (corpus['targetLanguage'] == 'Dutch') ]
corpusOther = corpus[ ((corpus['sourceLanguage'] != 'Dutch') & (corpus['sourceLanguage'] != 'French')) | ((corpus['targetLanguage'] != 'Dutch') & (corpus['targetLanguage'] != 'French'))]

In [3]:
comment = "New KBR data dump with plenty of additions and refinements and ISBN fix for FR-NL."
measurements = pd.DataFrame([
  utils.createCorpusMeasurements(corpusFRNL, 'FR-NL', comment),
  utils.createCorpusMeasurements(corpusNLFR, 'NL-FR', comment),
  utils.createCorpusMeasurements(corpusOther, 'OTHER', comment)
])

In [4]:
convertedFRNL = corpusFRNL.fillna('')

In [5]:
(convertedFRNL['targetISBN13'].values != '').sum()

6663

In [6]:
measurements

Unnamed: 0,date,corpus,numberTranslations,withTargetISBN10,withTargetISBN13,withKBRIdentifier,withBnFIdentifier,withKBIdentifier,withBBThesaurusID,withSourceKBRIdentifier,withKBRSourceTitle,withKBSourceTitle,withSourceISBN10,withSourceISBN13,comment
0,2022-06-21 12:55:37.541880,FR-NL,7350,6659,6663,6669,51,1890,6524,127,2399,1890,93,111,New KBR data dump with plenty of additions and...
1,2022-06-21 12:55:37.657670,NL-FR,3724,3170,3194,3247,1136,632,2717,182,1106,632,161,163,New KBR data dump with plenty of additions and...
2,2022-06-21 12:55:37.717150,OTHER,268,214,214,262,69,11,231,3,10,11,2,2,New KBR data dump with plenty of additions and...


In [7]:
measurements.to_csv('2022-06-20-translation-stats.csv', index=False)

## Create new contributor measurements

In [8]:
personContributors = pd.read_csv('./2022-06-20_with-duplicate-removing/csv/contributors-persons.csv', index_col='contributorID')

In [9]:
personComment = "New KBR data dump with plenty of additions and refinements."
personContributorsMeasurements = pd.DataFrame([
    utils.createContributorCorpusMeasurements(personContributors, personComment)
])

In [10]:
personContributorsMeasurements

Unnamed: 0,date,numberContributors,withKBRIdentifier,withBnFIdentifier,withKBIdentifier,withISNIIdentifier,withVIAFIdentifier,withWikidataIdentifier,withBirthDate,withDeathDate,...,withMultipleKBRIdentifiers,withMultipleBnFIdentifiers,withMultipleNTAIdentifiers,withMultipleISNIIdentifiers,withMultipleVIAFIdentifiers,withMultipleWikidataIdentifiers,withMultipleBirthDates,withMultipleDeathDates,withMultipleNationalities,comment
0,2022-06-21 12:55:37.962589,5843,5214,1231,1063,4486,2216,956,2441,603,...,27,8,4,9,23,0,44,10,105,New KBR data dump with plenty of additions and...


In [11]:
personContributorsMeasurements.to_csv('2022-06-20-person-contributor-stats.csv', index=False)