In [1]:
from stcn import *
import json
from collections import Counter
from os.path import join

We find all authors in the STCN

In [5]:
query = """

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX schema: <http://schema.org/>
PREFIX kb: <http://data.bibliotheken.nl/def#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>

SELECT * WHERE {
  

?resource schema:mainEntityOfPage ?mainEntity .
?mainEntity schema:isPartOf <http://data.bibliotheken.nl/id/dataset/stcn>  . 

OPTIONAL {
?resource schema:author ?author_node .
?author_node schema:author ?author .
?author schema:sameAs ?identifier .
}

}
"""
authors = run_query(query)


Number of unique authors

In [6]:
print(len(authors['author.value'].unique()))

37890


In [7]:
authors['author.value']

0         http://data.bibliotheken.nl/id/thes/p068304579
1         http://data.bibliotheken.nl/id/thes/p068304579
2         http://data.bibliotheken.nl/id/thes/p068304579
3         http://data.bibliotheken.nl/id/thes/p074092863
4         http://data.bibliotheken.nl/id/thes/p074092863
                               ...                      
399601                                               NaN
399602                                               NaN
399603                                               NaN
399604                                               NaN
399605                                               NaN
Name: author.value, Length: 399606, dtype: object

Find all wikidata and VIAF identifiers

In [None]:
wikidata = []
viaf = []

for wd_id in authors['identifier.value'].unique():
    if re.search( r'wikidata' , str(wd_id) ):
        wikidata.append( 'wd:' + os.path.basename(wd_id))
    elif re.search( r'viaf' , str(wd_id) ):
        viaf.append( os.path.basename(wd_id))

Find all nationalities associated with Wikidata identifiers.

Full list is chopped up in batches of 300 to reduce the processing time

In [None]:
print(len(wikidata))

start = 0
step = 300

for i in range(start,len(wikidata)+1,step):
    print(i)
    slice = wikidata[i:i+step]
    list_ids = ' '.join(slice)


    endpoint = 'https://query.wikidata.org/sparql'
    sparql = SPARQLWrapper(endpoint)

    query = '''
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>

    SELECT ?item ?nationalityLabel
    WHERE {
    VALUES ?item {'''+list_ids+'''} . 
    ?item wdt:P27 ?nationality .                                                                      
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".}
    }
    '''
    #print(query)

    df = run_query(query)
    
    out_file = f'wd{i}.csv' 
    path = os.path.join('Country_csv',out_file)
    
    df.to_csv(path,index=False)
    time.sleep(30)


In [None]:
# Find all nationalities associated with Wikidata identifiers

In [None]:
# VIAF

print(len(viaf))

start = 0
step = 300

for i in range(start,len(viaf)+1,step):
    print(i)
    slice = viaf[i:i+step]
    list_ids = ''
    for viaf_id in slice:
        list_ids += "\'" + viaf_id + "\' "


    endpoint = 'https://query.wikidata.org/sparql'
    sparql = SPARQLWrapper(endpoint)

    query = '''
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX viaf: <https://viaf.org/viaf/>

    SELECT ?person ?viaf_id ?nationalityLabel
    WHERE {
    VALUES ?viaf_id {'''+list_ids+'''} . 
    ?person wdt:P27 ?nationality .  
    ?person wdt:P214 ?viaf_id .
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".}
    }
    '''

    df = run_query(query)
    
    out_file = f'viaf{i}.csv' 
    path = os.path.join('Country_csv',out_file)
    
    df.to_csv(path,index=False)
    time.sleep(30)


identifiers_countries creates link between ID and country

In [3]:
identifiers_countries = dict()
dir = 'Country_csv'

for file in os.listdir(dir):
    if re.search( r'^viaf' , file ):
        print(file)
        df = pd.read_csv(os.path.join(dir,file))
        for i,row in df.iterrows():
            identifiers_countries[ row['viaf_id.value'] ] = row['nationalityLabel.value']
            
         
    elif re.search( r'^wd' , file ):
        print(file)
        df = pd.read_csv(os.path.join(dir,file))
        for i,row in df.iterrows():
            identifiers_countries[ row['item.value'] ] = row['nationalityLabel.value']
            

viaf19200.csv
viaf33600.csv
viaf5100.csv
wd300.csv
viaf11700.csv
viaf21600.csv
viaf4800.csv
viaf31200.csv
viaf3000.csv
viaf35700.csv
viaf7500.csv
wd5100.csv
viaf17400.csv
viaf28800.csv
viaf18900.csv
viaf9300.csv
wd4800.csv
viaf30900.csv
viaf15000.csv
viaf29100.csv
wd3000.csv
wd7500.csv
viaf11100.csv
viaf25500.csv
viaf21000.csv
viaf34800.csv
viaf33000.csv
viaf1200.csv
viaf37500.csv
viaf5700.csv
viaf3600.csv
viaf35100.csv
viaf0.csv
viaf10800.csv
viaf23400.csv
viaf13500.csv
viaf27300.csv
wd1200.csv
wd5700.csv
viaf300.csv
wd3600.csv
viaf29700.csv
viaf15600.csv
viaf23100.csv
viaf36900.csv
wd1500.csv
viaf35400.csv
viaf3300.csv
viaf24900.csv
viaf12900.csv
viaf22800.csv
viaf11400.csv
viaf15300.csv
viaf25200.csv
viaf9000.csv
wd3300.csv
viaf37200.csv
viaf1500.csv
viaf600.csv
viaf27600.csv
viaf13200.csv
viaf17700.csv
viaf23700.csv
viaf6900.csv
viaf21300.csv
wd600.csv
wd0.csv
viaf5400.csv
viaf33300.csv
wd7200.csv
viaf19500.csv
viaf29400.csv
viaf16800.csv
viaf9600.csv
wd6900.csv
viaf27000.csv
viaf1

'Nationalities' referring to British Isles

In [4]:
british_isles = ['Kingdom of England',
            'Commonwealth of England',
                 'Jersey',
                 'Kingdom of Northumbria',
                 'British North America',
                 'United Kingdom',
                 'Kingdom of Scotland',
                 'Great Britain',
                 'Scotland',
                 'Wales',
                 'Republic of Ireland',
                 'Kingdom of Ireland',
                 'United Kingdom of Great Britain and Ireland',
                 'British America',
                 'England']

In [8]:
authors.columns

Index(['resource.type', 'resource.value', 'mainEntity.type',
       'mainEntity.value', 'author_node.type', 'author_node.value',
       'author.type', 'author.value', 'identifier.type', 'identifier.value'],
      dtype='object')

Find authors from the british isles

In [9]:
british_authors = []
books_by_british_authors = []
for i,row in authors.iterrows():
    country = identifiers_countries.get(row['identifier.value'])
    if country in british_isles:
        print(f"{row['author.value']} ({country})")
        british_authors.append(row['author.value'])
        books_by_british_authors.append(row['resource.value'])

http://data.bibliotheken.nl/id/thes/p070089825 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070089825 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068824319 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069036489 (Commonwealth of England)
http://data.bibliotheken.nl/id/thes/p069856249 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069856249 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069397805 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069911517 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069199337 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067888364 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p071731466 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068824319 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067883001 (United Kingdom)
http://data.bibliotheken.nl/id

http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p069804184 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069797374 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p071468080 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p074304739 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069868174 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p072886315 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069036489 (Commonwealth of England)
http://data.bibliotheken.nl/id/thes/p069036489 (Commonwealth of England)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p114266395 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p070039127 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069967792 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069653348 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p068482817

http://data.bibliotheken.nl/id/thes/p070707677 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p071933654 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068469063 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068469063 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069557934 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070838755 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067857043 (England)
http://data.bibliotheken.nl/id/thes/p068421583 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068457995 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070957703 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p069634823 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p070242496 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p186331886 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067888747 (Wales)
http://data.bibliotheken.nl/id/thes/p067888747 (Wales)
http://data.bibliot

http://data.bibliotheken.nl/id/thes/p070139911 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p072050691 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p071661328 (Scotland)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p070074062 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p073535117 (Scotland)
http://data.bibliotheken.nl/id/thes/p069557934 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p24177277X (Scotland)
http://data.bibliotheken.nl/id/thes/p072483733 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421583 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britai

http://data.bibliotheken.nl/id/thes/p075140969 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068907117 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p068907117 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p072910828 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p067883001 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p067883001 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p07033546X (Kingdom of Ireland)
http://data.bibliotheken.nl/id/thes/p070130221 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p070130221 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p075182076 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069036489 (Commonwealth of England)
http://data.bibliotheken.nl/id/thes/p068143303 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://

http://data.bibliotheken.nl/id/thes/p087122928 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067857043 (England)
http://data.bibliotheken.nl/id/thes/p068478275 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068478275 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068478275 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068478267 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421796 (England)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p070440743 (Kingdom of England)
http://data.b

http://data.bibliotheken.nl/id/thes/p073482870 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p09749769X (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p09749769X (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p097478741 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p097478741 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069634823 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p068484461 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p073201170 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p095769692 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p089250044 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p100129188 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p357757807 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://

http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068350635 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p068387172 (Scotland)
http://data.bibliotheken.nl/id/thes/p068387172 (Scotland)
http://data.bibliotheken.nl/id/thes/p068387172 (Scotland)
http://data.bibliotheken.nl/id/thes/p074519638 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070094314 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p070094314 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068703953 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p357757807 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068468881 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068469063 (Kingdom of England)
http://data.

http://data.bibliotheken.nl/id/thes/p070706964 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070706964 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070706964 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p070527423 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068369123 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068369123 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070593973 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069911517 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069905673 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069385

http://data.bibliotheken.nl/id/thes/p073799270 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073799270 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068157177 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069122652 (Great Britain)
http://data.bibliotheken.nl/id/thes/p152517324 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p06842423X (England)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p141317531 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068421435 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.n

http://data.bibliotheken.nl/id/thes/p069316082 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069590850 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p06842423X (England)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p068922396 (Kingdom of Ireland)
http://data.bibliotheken.nl/id/thes/p071030689 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069868174 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p075055562 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067570224 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p068824319 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068294387 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069044813 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p069557934 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070017883 (Great Britain)


http://data.bibliotheken.nl/id/thes/p070251002 (Great Britain)
http://data.bibliotheken.nl/id/thes/p07025320X (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p071441115 (Great Britain)
http://data.bibliotheken.nl/id/thes/p071323163 (England)
http://data.bibliotheken.nl/id/thes/p074786822 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p06862722X (Great Britain)
http://data.bibliotheken.nl/id/thes/p070534462 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069943591 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p160540127 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p073017302 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p074786822 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p067888747 (Wales)
http://data.bibliotheken.nl/id/thes/p069695776 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069695776 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069695776 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069695

http://data.bibliotheken.nl/id/thes/p070850550 (Scotland)
http://data.bibliotheken.nl/id/thes/p069385874 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068478321 (Great Britain)
http://data.bibliotheken.nl/id/thes/p072100168 (Great Britain)
http://data.bibliotheken.nl/id/thes/p203114655 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p116675535 (England)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068894635 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p070234922 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p191883522 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069634823 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p070273545 (United Kingdom of Great Britain and Ireland)
http://data.b

http://data.bibliotheken.nl/id/thes/p068478445 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068478445 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068742002 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069557934 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069387850 (Republic of Ireland)
http://data.bibliotheken.nl/id/thes/p06865734X (Scotland)
http://data.bibliotheken.nl/id/thes/p085601527 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p085601527 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p085601527 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p085601527 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069892180 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069892180 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069892180 (Great Britain)
http://data.bibliotheken.nl/id/thes/p07145359

http://data.bibliotheken.nl/id/thes/p072768673 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069911517 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069911517 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p071846271 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p120540320 (Great Britain)
http://data.bibliotheken.nl/id/thes/p070508852 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p072504897 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p234832029 (Great Britain)
http://data.bibliotheken.nl/id/thes/p06862722X (Great Britain)
http://data.bibliotheken.nl/id/thes/p06862722X (Great Britain)
http://data.bibliotheken.nl/id/thes/p06862722X (Great Britain)
http://data.bibliotheken.nl/id/thes/p06862722X (Great Britain)
http://data.bibliotheken.nl/id/thes/p06862722X (Great Britain)
http://data.bibliotheken.nl/id/thes/p06862722X (Great B

http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068411480 (Kingdom of Ireland)
http://data.bibliotheken.nl/id/thes/p068457502 (England)
http://data.bibliotheken.nl/id/thes/p068457502 (England)
http://data.bibliotheken.nl/id/thes/p069818673 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p070440743 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069868174 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p072390522 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069892180 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069892180 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069892180 (Great Britain)
http://data.bibl

http://data.bibliotheken.nl/id/thes/p264084330 (England)
http://data.bibliotheken.nl/id/thes/p072910828 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p072910828 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p115580387 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p266561187 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p101828837 (England)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p070130221 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p070130221 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p070574278 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068468881 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068468881 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069557934 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069557934 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069557

http://data.bibliotheken.nl/id/thes/p072519061 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069343233 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p071180842 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p070707677 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p101828837 (England)
http://data.bibliotheken.nl/id/thes/p070527423 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069911029 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068421796 (England)
http://data.bibliotheken.nl/id/thes/p07145652X (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p292787634 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p073929964 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p121177211 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p071432302 (Great Britain)
http://data.bibliotheken.nl/id/thes/p070421250 (Great Britain)
http://data.bibliotheken.nl/id/thes/p0682496

http://data.bibliotheken.nl/id/thes/p070963789 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069390096 (Great Britain)
http://data.bibliotheken.nl/id/thes/p06867788X (United Kingdom)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p297675427 (Great Britain)
http://data.bibliotheken.nl/id/thes/p071441115 (Great Britain)
http://data.bibliotheken.nl/id/thes/p297830325 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068239971 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070296715 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073143049 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069899908 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073127965 (Great Britain)
http://data.bibliotheken.nl/id/thes/p298010291 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p072870443 (Great Britain)
http://data.bibliotheken.nl/id/the

http://data.bibliotheken.nl/id/thes/p068351771 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p072685832 (Great Britain)
http://data.bibliotheken.nl/id/thes/p126871302 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p09113417X (United Kingdom)
http://data.bibliotheken.nl/id/thes/p234832029 (Great Britain)
http://data.bibliotheken.nl/id/thes/p072870443 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073286648 (England)
http://data.bibliotheken.nl/id/thes/p074485733 (Kingdom of Ireland)
http://data.bibliotheken.nl/id/thes/p06842423X (England)
http://data.bibliotheken.nl/id/thes/p303077174 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069913110 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p073350680 (Great Britain)
http://data.bibliotheken.nl/id/thes/p075242664 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p068369123 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068469063 (

http://data.bibliotheken.nl/id/thes/p067913636 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068369123 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068369123 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069804184 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p06842423X (England)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p068403356 (Great Britain)
http://data.bibliotheken.nl/id/thes/p071136398 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068457634 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068457634 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073523054 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073523054 (Great Britain)
http://data.bibliotheken.nl/id/thes/p075046563 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068482817 (Kingdom of England)
http://data.

http://data.bibliotheken.nl/id/thes/p068375018 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p073523054 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073523054 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073523054 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068239971 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068249691 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p312818904 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p081500653 (Great Britain)
http://data.bibliotheken.nl/id/thes/p071249907 (Republic of Ireland)
http://data.bibliotheken.nl/id/thes/p068432488 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069045429 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073523054 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p06862722

http://data.bibliotheken.nl/id/thes/p069044813 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069633452 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069856249 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069558310 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069558310 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073350680 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068421443 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p069456046 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p305943189 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068335628 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070094314 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p072100168 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073143049 (Great Britain)
http://data.bibliotheken.nl/id/thes/p070856419 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/

http://data.bibliotheken.nl/id/thes/p069911517 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p069868174 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p097254525 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p154121789 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068387148 (Kingdom of Ireland)
http://data.bibliotheken.nl/id/thes/p071452710 (Great Britain)
http://data.bibliotheken.nl/id/thes/p06867788X (United Kingdom)
http://data.bibliotheken.nl/id/thes/p073143049 (Great Britain)
http://data.bibliotheken.nl/id/thes/p116675535 (England)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p069385785 (Scotland)
http://data.bibliotheken.nl/id/thes/p068369123 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p073210374 (Kingdom of England)
http://data.bi

http://data.bibliotheken.nl/id/thes/p070273545 (United Kingdom of Great Britain and Ireland)
http://data.bibliotheken.nl/id/thes/p068907117 (Kingdom of Scotland)
http://data.bibliotheken.nl/id/thes/p068425147 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068536283 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p293903026 (Great Britain)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p068482876 (England)
http://data.bibliotheken.nl/id/thes/p069429421 (Great Britain)
http://data.bibliotheken.nl/id/thes/p073491322 (Great Britain)
http://data.bibliotheken.nl/id/thes/p071991913 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p072024917 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068211872 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p068211872 (United Kingdom)
http://data.bibliotheken.nl/id/thes/p069316082 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p070761817 (Kingdom of England)
http://d

http://data.bibliotheken.nl/id/thes/p068423578 (Kingdom of Ireland)
http://data.bibliotheken.nl/id/thes/p069343233 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p068468881 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p172028310 (Great Britain)
http://data.bibliotheken.nl/id/thes/p069122652 (Great Britain)
http://data.bibliotheken.nl/id/thes/p072463295 (Kingdom of England)
http://data.bibliotheken.nl/id/thes/p152506764 (Great Britain)


In [10]:
british_authors = list(set(british_authors))
books_by_british_authors = list(set(books_by_british_authors))

print(len(british_authors))
print(len(books_by_british_authors))

782
4290


Find books by British authors

In [11]:
books_by_british_authors[0]

'http://data.bibliotheken.nl/id/nbt/p057720770'

In [13]:
import time

endpoint = 'http://data.bibliotheken.nl/sparql'
sparql = SPARQLWrapper(endpoint)

start = 0
step = 300

for i in range(start,len(books_by_british_authors)+1,step):
    print(i)
    slice = books_by_british_authors[i:i+step]
    list_ids = ''
    for resource in slice:
        ppn = os.path.basename(resource)
        ppn = re.sub( r'^p' , '' , ppn )
        list_ids += "\'" + ppn + "\' "

    query = '''
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX schema: <http://schema.org/>
    PREFIX kb: <http://data.bibliotheken.nl/def#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT * WHERE {
    VALUES ?book_id {'''+list_ids+'''} . 
    ?resource schema:mainEntityOfPage ?mainEntity .
    ?mainEntity kb:ppn ?book_id . 

    OPTIONAL {
    ?resource schema:name ?title . }

    OPTIONAL {
    ?resource schema:author ?author_node . 
    ?author_node schema:author ?author_info .
    ?author_info  rdfs:label ?author_name . }

    OPTIONAL {
    ?resource schema:publication ?publ .
    ?publ schema:startDate ?publ_year . 
    ?publ schema:publishedBy ?publisher . 
    ?publisher schema:name ?publ_name .
    }
    
    OPTIONAL {
        ?resource schema:inLanguage ?language . }

    OPTIONAL { 
    ?resource schema:about ?subject .
    ?subject skos:prefLabel ?subject_label .
    } 


    }


    '''
    
    #print(query)
    

    df = run_query(query)
    

    out_file = f'book_{i}.csv' 
    

    df.to_csv(join('Books',out_file),index=False)
    time.sleep(30)


0
300
600
900
1200
1500
1800
2100
2400
2700
3000
3300
3600
3900
4200


In [30]:
with open('wd_resources.csv','w') as out:
    out.write('id\n')
    for identifier in books_by_british_authors:
        out.write(f'{identifier}\n')
        

In [31]:
import pandas as pd
file1 = pd.read_csv('wd_resources.csv')
ids1 = file1['id'].unique()

file2 = pd.read_csv('translations_ids.csv')
ids2 = file2['id'].unique()

In [32]:
for resource in ids1:
    if resource not in ids2:
        print(resource)

http://data.bibliotheken.nl/id/nbt/p057720770
http://data.bibliotheken.nl/id/nbt/p057436789
http://data.bibliotheken.nl/id/nbt/p115653562
http://data.bibliotheken.nl/id/nbt/p180934309
http://data.bibliotheken.nl/id/nbt/p10410306X
http://data.bibliotheken.nl/id/nbt/p310744431
http://data.bibliotheken.nl/id/nbt/p217618715
http://data.bibliotheken.nl/id/nbt/p094954666
http://data.bibliotheken.nl/id/nbt/p862896444
http://data.bibliotheken.nl/id/nbt/p296028231
http://data.bibliotheken.nl/id/nbt/p314594973
http://data.bibliotheken.nl/id/nbt/p324432992
http://data.bibliotheken.nl/id/nbt/p29701465X
http://data.bibliotheken.nl/id/nbt/p302242805
http://data.bibliotheken.nl/id/nbt/p190397497
http://data.bibliotheken.nl/id/nbt/p266959083
http://data.bibliotheken.nl/id/nbt/p310266955
http://data.bibliotheken.nl/id/nbt/p252296850
http://data.bibliotheken.nl/id/nbt/p250398613
http://data.bibliotheken.nl/id/nbt/p298853817
http://data.bibliotheken.nl/id/nbt/p336162715
http://data.bibliotheken.nl/id/nbt

http://data.bibliotheken.nl/id/nbt/p238236013
http://data.bibliotheken.nl/id/nbt/p09578313X
http://data.bibliotheken.nl/id/nbt/p154202193
http://data.bibliotheken.nl/id/nbt/p315640480
http://data.bibliotheken.nl/id/nbt/p173493009
http://data.bibliotheken.nl/id/nbt/p844168785
http://data.bibliotheken.nl/id/nbt/p212686364
http://data.bibliotheken.nl/id/nbt/p084635339
http://data.bibliotheken.nl/id/nbt/p833437976
http://data.bibliotheken.nl/id/nbt/p081092555
http://data.bibliotheken.nl/id/nbt/p822695472
http://data.bibliotheken.nl/id/nbt/p256350353
http://data.bibliotheken.nl/id/nbt/p087918757
http://data.bibliotheken.nl/id/nbt/p16607196X
http://data.bibliotheken.nl/id/nbt/p105073091
http://data.bibliotheken.nl/id/nbt/p158738586
http://data.bibliotheken.nl/id/nbt/p216917727
http://data.bibliotheken.nl/id/nbt/p297724029
http://data.bibliotheken.nl/id/nbt/p059595582
http://data.bibliotheken.nl/id/nbt/p326480021
http://data.bibliotheken.nl/id/nbt/p115181415
http://data.bibliotheken.nl/id/nbt

http://data.bibliotheken.nl/id/nbt/p234940409
http://data.bibliotheken.nl/id/nbt/p30623341X
http://data.bibliotheken.nl/id/nbt/p21690658X
http://data.bibliotheken.nl/id/nbt/p113613105
http://data.bibliotheken.nl/id/nbt/p840203225
http://data.bibliotheken.nl/id/nbt/p301236658
http://data.bibliotheken.nl/id/nbt/p181682869
http://data.bibliotheken.nl/id/nbt/p101339747
http://data.bibliotheken.nl/id/nbt/p844226068
http://data.bibliotheken.nl/id/nbt/p217042848
http://data.bibliotheken.nl/id/nbt/p103772472
http://data.bibliotheken.nl/id/nbt/p32699095X
http://data.bibliotheken.nl/id/nbt/p260118222
http://data.bibliotheken.nl/id/nbt/p33061178X
http://data.bibliotheken.nl/id/nbt/p85200351X
http://data.bibliotheken.nl/id/nbt/p15121137X
http://data.bibliotheken.nl/id/nbt/p216905109
http://data.bibliotheken.nl/id/nbt/p192787322
http://data.bibliotheken.nl/id/nbt/p333052544
http://data.bibliotheken.nl/id/nbt/p267787766
http://data.bibliotheken.nl/id/nbt/p093155026
http://data.bibliotheken.nl/id/nbt

## Make JSON

In [33]:
def print_value(value):
    if pd.isna(value):
        return ''
    else:
        return value
    
def get_values(df,field,fields_dict):
    rows = df.drop_duplicates(field)
    all_rows = []
    for i,row in rows.iterrows():
        values = dict()
        for f in fields_dict:
            values[fields_dict[f]] = print_value(row[f])
        all_rows.append(values)
    return all_rows   

In [34]:
import numpy as np

topics_dict = dict()


def read_topics():
    topics_dict = dict()
    topics = pd.read_excel('Topics.xlsx')
    for i,row in topics.iterrows():
        topics_dict[ row['topic'] ] = row['group']
    return topics_dict
    

def cluster_subject(subject):
    
    global topics_dict
    
    if not topics_dict:
        topics_dict = read_topics()
    
    if not(pd.isna(subject)):    
        return topics_dict[subject]
    else:
        return np.nan

topics_dict = read_topics()
    


In [35]:
transl = pd.read_csv('translations.csv')
translations_ids = transl['resource.value'].unique()
print(len(translations_ids))

6103


In [39]:
path = join('Books','book_300.csv')
df = pd.read_csv(path)
df['author_info.value']

0      http://data.bibliotheken.nl/id/thes/p101187718
1      http://data.bibliotheken.nl/id/thes/p067852068
2      http://data.bibliotheken.nl/id/thes/p071323163
3      http://data.bibliotheken.nl/id/thes/p071323163
4      http://data.bibliotheken.nl/id/thes/p068421443
                            ...                      
560    http://data.bibliotheken.nl/id/thes/p070706964
561    http://data.bibliotheken.nl/id/thes/p070761817
562    http://data.bibliotheken.nl/id/thes/p069385785
563    http://data.bibliotheken.nl/id/thes/p069385785
564    http://data.bibliotheken.nl/id/thes/p071943986
Name: author_info.value, Length: 565, dtype: object

In [40]:


count = 0
data = []

dir = 'Books'
for csv_file in os.listdir(dir):
    print(csv_file)
    if re.search('csv$',csv_file):
        path = join(dir,csv_file)
        df = pd.read_csv(path)
        df['subject_cluster'] = df['subject_label.value'].apply(cluster_subject)
        

        unique_ids = df['resource.value'].unique()
        


        for resource in unique_ids:
            if resource not in translations_ids:
                
                record = dict()
                record['ppn'] = resource
                #print(resource)

                df_resource = df[ df['resource.value'] == resource ]
                lang = print_value(df_resource.iloc[0]['language.value'])
                if lang != 'en':

                    record['title'] = print_value(df_resource.iloc[0]['title.value'])
                    record['year'] = print_value(df_resource.iloc[0]['publ_year.value'])

                    ## Publishers
                    unique_id = 'publisher.value'
                    fields_dict = { 'publisher.value':'ppn',
                        'publ_name.value':'name'
                    }
                    all_publishers = get_values(df_resource,unique_id,fields_dict)
                    record['publishers'] = all_publishers

                    ## Subjects
                    unique_id = 'subject.value'
                    fields_dict = {'subject.value':'ppn',
                              'subject_label.value':'label',
                               'subject_cluster':'group' }
                    all_subjects = get_values(df_resource,unique_id,fields_dict)
                    record['subjects'] = all_subjects

                    ## Authors
                    unique_id = 'author_info.value'
                    fields_dict = {'author_info.value':'ppn',
                              'author_name.value':'name' }
                    all_authors = get_values(df_resource,unique_id,fields_dict)
                    record['authors'] = all_authors

                    ## Languages
                    unique_id = 'language.value'
                    fields_dict = {'language.value':'code' }
                    all_languages = get_values(df_resource,unique_id,fields_dict)
                    record['languages'] = all_languages

                    count += 1
                    data.append(record)

print(len(data))
with open('british_authors.json','w',encoding='utf-8') as out:
    out.write(json.dumps(data,indent=4))
    
print(count)

book_300.csv
book_3300.csv
book_1500.csv
book_1200.csv
book_3600.csv
book_600.csv
book_3000.csv
book_2700.csv
book_0.csv
book_1800.csv
book_2100.csv
book_4200.csv
book_2400.csv
book_900.csv
book_3900.csv
1112
1112


In [41]:
f = open('british_authors.json')
json_data = json.load(f)
f.close()

In [42]:
len(json_data)

1112

## Languages

In [52]:
language_freq = Counter()

for book in json_data:
    for language in book['languages']:

        language_freq.update([language['code']])
        
for language,count in language_freq.most_common(len(language_freq)):
    print(f'{Lang(language).name},{count}')
        

Latin,686
Dutch,318
French,103
German,15
Hebrew,3
Old English (ca. 450-1100),3
English,2
Italian,2
Gothic,2
Multiple languages,1
Old High German (ca. 750-1050),1
Ancient Greek (to 1453),1


## Authors

In [44]:
author_count = Counter()

for book in json_data:
    for author in book['authors']:
        author_count.update( [author['name'] ] )

        
for author,count in author_count.most_common():
    print(f"{author} {count}")

Ames, William (1576-1633) 75
Johannes de Garlandia (ca.1195-ca.1272) 48
Barclay, John (1582-1621) 38
Bacon, Francis (1561-1626) 32
Jacchaeus, Gilb. (ca. 1578-1628) 30
Buchanan, George (1506-1582) 29
Downing, George (ca. 1623-1684) 27
Willis, Thomas (1621-1675 ; medicus) 22
Freind, Johannes (1675-1728) 22
Owen, John (1560-1622) 19
Harvey, William (1578-1657) 18
Verstegen, Richard (ca. 1548-1640) 17
Perkins, William (1558-1602) 16
Milton, John (1608-1674) 15
Sydenham, Thomas (1624-1689) 15
Hobbes, Thomas (1588-1679) 14
Burnet, Thomas (1635?-1715) 13
Mandeville, Jean de (-1372) 13
Ames, William (-1662) 13
Selden, John (1584-1654) 12
More, Thomas (1478-1535) 12
Cartwright, Thomas (1535-1603) 12
Junius, Franciscus (F.F. ; 1589/91-1677 ; taalkundige) 12
Morus, Alexander (1616-1670) 11
Yorke, Joseph (sir ; baron Dover ; 1724-1795) 11
Digby, Kenelm (Sir ; 1603-1665) 10
Glissonius, Franciscus (1597-1677) 10
Colomiés, Paul (1638-1692) 9
Stanyhurst, Guillaume (1601-1663) 9
James I (koning van Gro

In [None]:
Works by specific author

In [64]:
regex_author_name = 'milton'
languages = dict()
all_publ = Counter()
years = []
ppn = dict()
titles = []

count = 0

for book in json_data:
    author_name = ''
    for author in book['authors']:
        author_name += author['name'] + '; '

    if re.search(regex_author_name,author_name,re.IGNORECASE):
        count += 1
        print( f"{count}. " , end = '')
        print( book['ppn'])
        ppn[book['ppn']] = ppn.get(book['ppn'],0)+1
        print( book['title'] , end = '')
        titles.append(book['title'])
        print( f" ({book['year']})" )
        years.append(book['year'])
        
        authors = ''
        for author in book['authors']:
            authors += author['name'].strip() + '; ' 
        print(authors)
        

        for publisher in book['publishers']:
            print(publisher['name'])
            all_publ.update([publisher['name']])
    
        subjects = ''
        for subject in book['subjects']:
            subjects += subject['group'] + ' -- '
        print(subjects)
        
        all_languages = ''
        for lang in book['languages']:
            all_languages += lang['code']
            languages[lang['code']] = languages.get(lang['code'],0)+1
        print(all_languages)
        print('\n')
        
print(languages)
print(len(ppn))
print(sorted(years))
for t in sorted(titles):
    print(t)
    
for p,i in all_publ.most_common():
    print( f"{p} {i}")

1. http://data.bibliotheken.nl/id/nbt/p11266606X
Verdedigingh des gemeene volcks van Engelandt, tegens Claudius sonder naem, alias Salmasius Konincklijcke verdedigingh. (1651)
Milton, John (1608-1674); 
Janssonius, Johannes
Dugard, William
History -- 
nl


2. http://data.bibliotheken.nl/id/nbt/p036788880
Die somma ende dat begrijp des menschen salicheyts. ()
Hamilton, Patrick (-1528); 

Theology -- 
nl


3. http://data.bibliotheken.nl/id/nbt/p240219805
Literæ pseudo-senatûs Anglicani, Cromwellii, reliquorumque perduellium. (1676)
Milton, John (1608-1674); 
Fricx, Eugene Henri (II)
History -- 
la


4. http://data.bibliotheken.nl/id/nbt/p078412838
Ioannis MiltonI Angli pro se defensio contra Alexandrum Morum (1655)
Milton, John (1608-1674); 
Vlacq, Adriaen
History -- Theology -- 
la


5. http://data.bibliotheken.nl/id/nbt/p204351855
Pro populo Anglicano defensio, contra Claudii anonymi, aliàs Salmasii, Defensionem regiam. (1652)
Milton, John (1608-1674); 
Ackersdijck, Dirck van
Zijll, 

In [55]:
## Works in Dutch

In [65]:
authors_freq = Counter()


for book in json_data:
    language = book['languages'][0]['code']
    if language == 'nl':
        print( book['ppn'])
        print( book['title'] )
        
        authors = ''
        for author in book['authors']:
            authors += author['name'].strip() + '; ' 
        print(authors)
        authors_freq.update([authors])
        
        all_publ = ''
        for publisher in book['publishers']:
            print(publisher['name'])
            all_publ += publisher['name'] + '; '
        print(all_publ)

            
        subjects = ''
        for subject in book['subjects']:
            subjects += subject['group'] + ' -- '
        print(subjects)
        
        all_languages = ''
        for lang in book['languages']:
            all_languages += lang['code']
            languages[lang['code']] = languages.get(lang['code'],0)+1
        print(all_languages)
        print('\n')
        

for author,count in authors_freq.most_common():
    print(f"{author},{count}")

http://data.bibliotheken.nl/id/nbt/p11266606X
Verdedigingh des gemeene volcks van Engelandt, tegens Claudius sonder naem, alias Salmasius Konincklijcke verdedigingh.
Milton, John (1608-1674); 
Janssonius, Johannes
Dugard, William
Janssonius, Johannes; Dugard, William; 
History -- 
nl


http://data.bibliotheken.nl/id/nbt/p170991482
Uitgezochte gedachten, over verscheide godsdienstige en zedekundige onderwerpen. Uit de Nachtbedenkingen.
Young, Edward (1683-1765); 
Capel, Anthonie
Kroe, Albert van der
Capel, Anthonie; Kroe, Albert van der; 
Language and literature -- 
nl


http://data.bibliotheken.nl/id/nbt/p171399315
Nodige aanmerkinge omtrent eenige woorden gesprooken van D. Johannes Visscherus.
Smith, Thomas (1638-1710); 
Dalen, Daniel van den
Dalen, Daniel van den; 
Theology -- 
nl


http://data.bibliotheken.nl/id/nbt/p184039967
Honderdt geestige caracteren, ofte Uitbeeldingen van honderdt verscheidene personen.
Verstegen, Richard (ca. 1548-1640); 
Mourik, Bernardus
Mourik, Bernardus;

Woons, Cornelis
Woons, Cornelis; 
Theology -- 
nl


http://data.bibliotheken.nl/id/nbt/p401600661
Historie van den lydende Christus, vertoont in CXIX. meditatien.
Stanyhurst, Guillaume (1601-1663); 
Jouret, Petrus
Jouret, Petrus; 
Theology -- 
nl


http://data.bibliotheken.nl/id/nbt/p850526876
Mergh der ghódtgheleerdtheidt.
Ames, William (1576-1633); 
Hoopwater, Abraham
Hoopwater, Abraham; 
Theology -- 
nl


http://data.bibliotheken.nl/id/nbt/p057128308
D'algemeene en bysondere wercking der genees-middelen in s'menschen lichaam.
Willis, Thomas (1621-1675 ; medicus); 
Goeree, Wilhelmus (I)
Janssonius van Waesberge, Johannes (I)
Goeree, Wilhelmus (I)
Goeree, Wilhelmus (I); Janssonius van Waesberge, Johannes (I); Goeree, Wilhelmus (I); 
Medicine -- 
nl


http://data.bibliotheken.nl/id/nbt/p092849792
De heerlickheyt van een kindt Gods. Ofte, Gods vriendt.
Dyke, Jeremiah (1584-1639); 
Kuypen, Jan Pietersz
Kuypen, Jan Pietersz; 
Theology -- 
nl


http://data.bibliotheken.nl/id/nbt/p09339648

## Subjects

In [None]:
subjects_freq = Counter()

for book in json_data:
    languages = ''
    for lang in book['languages']:
        languages += lang['code'] + ' '
        
    if re.search(r'\bnl\b' , languages):
    #if 2<3:
        for subject in book['subjects']:
            subjects_freq.update([subject['group']])
            

In [None]:
fig = plt.figure( figsize=( 7 , 7 ) )

x_axis = []
y_axis = []

total = sum(subjects_freq.values())
for subject, count in subjects_freq.most_common():
    y_axis.append(subject)
    percentage = round(((count/total)*100),2)
    x_axis.append( percentage  )
    print(subject , percentage)

graph = sns.barplot( y= y_axis , x= x_axis ,  dodge=False , color = '#e63c30' )

graph.set_title('Translations of books in England' , size = 20) 
graph.set_xlabel('Subject' , size = 14) 
graph.set_ylabel('Number of books' , size = 14 )

plt.show()

## Social Network Analysis

In [51]:
edges = open('britishauthors_edges.csv','w')
nodes = open('britishauthors_nodes.csv','w')

all_authors = dict()
all_publishers = dict()


edges.write('Source,Target,Type\n')
nodes.write('Id,Label,Type\n')

for book in json_data:
    authors = []
    publishers = []
    for author in book['authors']:
        authors.append(os.path.basename(author['ppn']))
        all_authors[author['ppn']] = author['name']
    for publisher in book['publishers']:
        #print(os.path.basename(author['ppn']))
        publishers.append(os.path.basename(publisher['ppn']))
        all_publishers[publisher['ppn']] = publisher['name']
    for author in authors:
        for publisher in publishers:
            if re.search( r'\d' , publisher):
                edges.write( f'{author},{publisher},Directed\n')
    
for node in all_authors:
    name = all_authors[node]
    if re.search( '\(' , name):
        name = name[:name.index('(')].strip()        
    nodes.write(f'{os.path.basename(node)},"{name}",Author\n')
    
    
for node in all_publishers:
    name = all_publishers[node]
    if re.search( '\(' , name):
        name = name[:name.index('(')].strip()  
    nodes.write(f'{os.path.basename(node)},"{name}",Publisher\n')