# Analyses of places of publication

In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
from IPython.core.display import display, HTML
import pandas as pd
import numpy as np
import json
import re
from stcn import *
import folium
from tqdm import tqdm
from collections import Counter

## Place of creation

The following SPARQL query requests data about the titles published in the fifteenth or sixteenth century from the [Short Title Catalogue of the Netherlands](https://data.cerl.org/stcn/)

In [2]:
query = """

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX schema: <http://schema.org/>
PREFIX kb: <http://data.bibliotheken.nl/def#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT * WHERE {

?resource schema:mainEntityOfPage ?mainEntity .
?mainEntity schema:isPartOf <http://data.bibliotheken.nl/id/dataset/stcn>  . 
  
?resource schema:publication ?publ .
?resource schema:name ?title . 

OPTIONAL {
?resource schema:alternateName ?alt_title . }

OPTIONAL {
?publ schema:startDate ?publ_year . 
?publ schema:description ?imprint . 
?publ schema:publishedBy ?publisher . 
?publisher schema:name ?publ_name .
?publisher schema:location ?location_node .
?location_node schema:address ?address .
?address schema:addressLocality ?place . 
} .

FILTER( regex(?publ_year, "^1[45]", "i") ) .

} 

"""
df = run_query(query)


In [3]:
nr_results = df.drop_duplicates(subset=['resource.value']).shape[0]
print(f'The query returns information about {nr_results} titles.')

The query returns information about 8332 titles.


In some records, the year of publication is truncated. The cell below replaces the truncated values to the number '5', to make it easier to process those values. 

In [4]:
def standardise_date(row):
    if re.search(r'x' , row['publ_year.value'] , re.IGNORECASE):
        row['publ_year.value'] = re.sub( 'x' , '5' , row['publ_year.value'] , re.IGNORECASE)
    
    if re.search(r'\d{4}' , row['publ_year.value']):
        return int(row['publ_year.value'])
    else:
        return np.nan
    
    
df['datetime'] = df.apply(standardise_date, axis=1)
df=df.dropna(subset=['datetime'])

The file {Download}`stcn_gis.tsv<stcn_gis.tsv>` contains an list of all the cities that are mentioned in the STCN. Additionally, it contains the latitude and the longitude of these locations, and information about the countries these cities are in. The assignment of the country is based on today's political borders. The information was created via [geocoding](https://developers.arcgis.com/documentation/mapping-apis-and-services/geocoding/). 


The file 'stcn_gis.tsv' is read using the code below. 

In [5]:
locations = pd.read_csv('stcn_gis.tsv',sep='\t')

gis = dict()
country = dict()

for i,row in locations.iterrows():
    gis[row['name']] = [row['latitude'],row['longitude']]
    if re.search( r',' ,row['display_name'] ):
        parts = re.split( r',' , row['display_name'] )
        country_name = parts[-1].strip()
    else:
        country_name = row['display_name']
    if re.search(r'\/', country_name ):
        country_name = country_name[ country_name.rindex('/')+1:]
    country[ row['name'] ] = country_name.strip()


The following code converts the STCN data to the JSON format, to ease the processing of these data. The place names are also enriched with the geographic coordinates. Running the cell may take time. The data set is written to the disk under the file name 'stcn_locations.js'.

In [6]:
unique_ids = df.sort_values('datetime')['resource.value'].unique()

data = []

def print_value(value):
    if pd.isna(value):
        return ''
    else:
        return value
    
def get_values(df,field,fields_dict):
    rows = df.drop_duplicates(field)
    all_rows = []
    for i,row in rows.iterrows():
        values = dict()
        for f in fields_dict:
            values[fields_dict[f]] = print_value(row[f])
        all_rows.append(values)
    return all_rows    


for resource in tqdm(unique_ids):

    record = dict()
    record['ppn'] = resource
    #print(resource)

    df_resource = df[ df['resource.value'] == resource ]
    
    record['title'] = df_resource.iloc[0]['title.value']
    if not pd.isna(df_resource.iloc[0]['alt_title.value']):
        record['alternative_title'] = df_resource.iloc[0]['alt_title.value']
    record['year'] = df_resource.iloc[0]['publ_year.value']
    
    ## Publishers
    unique_id = 'publisher.value'
    fields_dict = { 'publisher.value':'ppn',
        'publ_name.value':'name' ,
        'place.value':'place' , 
        
    }
    
    all_publishers = get_values(df_resource,unique_id,fields_dict)
    record['publishers'] = all_publishers
    
    place = df_resource.iloc[0]['place.value']
    coordinates = gis.get(place)

    if coordinates:
        record['location'] = { 
            'place_name' : place , 
            'latitude':coordinates[0] , 
                             'longitude':coordinates[1] , 
                             'country':country.get(place)}

    data.append(record)
    
with open('stcn_locations.json','w',encoding='utf-8') as out:
    out.write(json.dumps(data,indent=4))
    
f = open('stcn_locations.json')
json_data = json.load(f)
f.close()



100%|██████████████████████████████████████| 6442/6442 [00:11<00:00, 545.01it/s]


In [7]:
def singular_plural(noun,count):
    if count>1:
        noun=noun+'s'
    return noun
        
def print_map_nl(place_count,out_file='map.html'):
    
    map = folium.Map( location=[51.875127716984274, 5.360011275145361] , 
    width="%100",
    height="%100",
    zoom_start=8)

    for place,count in place_count.most_common():

        folium.Circle(
            location=[ gis[place][0], gis[place][1]],
            radius= count * 25 ,
            color='#1827c9',
            fill=False ,
            fill_opacity = 0.5,
            fill_color = '#c72d22' ,
            weight = 2 ,
            popup = f'{place}: {count} {singular_plural("title",count)}.').add_to(map)

    map.save(out_file)
    return map
 
def print_map_world(place_count,out_file='map.html'):
    map = folium.Map( location=[52.157004081232024, 4.495349166689632] , 
    width="%100",
    height="%100",
    zoom_start=3)

    for place,count in place_count.most_common():

        folium.Circle(
            location=[ gis[place][0], gis[place][1]],
            radius= count*500 ,
            color='#1827c9',
            fill=False ,
            fill_opacity = 0.5,
            fill_color = '#c72d22' ,
            weight = 2 ,
            popup = f'{place}: {count} {singular_plural("title",count)}.').add_to(map)

    map.save(out_file)
    return map


How did the art of printing spread across the low countries? The code below identifies the cities that are mentioned in the export from the STCN. The analyses is broken down by periods of 25 years. 

In [8]:
cities = []
from IPython.display import display

period_length = 25

for year in range( 1450 , 1600, period_length ):
    end = year+period_length
    out_file= f'map_{year}-{end}.html'
    
    print(f'\n{year}-{end}\n\n')
    
    for title in json_data:
        title['year'] = re.sub(r'X','5',title['year'],re.IGNORECASE)
        if 'location' in title:
            if int(title['year']) >= year and int(title['year'])<end and (title['location']['country'] == 'Nederland' or title['location']['country'] == 'Belgien'):
                cities.append(title['location']['place_name'])
                #print(title['location']['place_name'])

    place_count = Counter(cities)
    folium_map = print_map_nl(place_count,out_file= out_file)
    display(folium_map)


1450-1475





1475-1500





1500-1525





1525-1550





1550-1575





1575-1600




These maps can also be viewed in a separate window:

* {Download}`1450-1475<map_1450-1475.html>`
* {Download}`1475-1500<map_1475-1500.html>`
* {Download}`1500-1525<map_1500-1525.html>`
* {Download}`1525-1550<map_1525-1550.html>`
* {Download}`1550-1575<map_1550-1575.html>`
* {Download}`1575-1600<map_1575-1600.html>`



## Printing in Dutch outside of the Low Countries

In [9]:
cities = []

period_length = 25

for year in range( 1450 , 1600, period_length ):
    end = year+period_length
    print(f'{year}-{end}')
    
    for title in json_data:
        title['year'] = re.sub(r'X','5',title['year'],re.IGNORECASE)
        if 'location' in title:
            if int(title['year']) >= year and int(title['year'])<end and title['location']['country'] != 'Nederland' and title['location']['country'] != 'Belgien':
                if title['location']['place_name'] != 'Place unknown':
                    cities.append(title['location']['place_name'])
                #print(title['location']['place_name'])

    place_count = Counter(cities)


    folium_map = print_map_world(place_count,out_file= f'map_europe_{year}-{end}.html')
    display(folium_map)

1450-1475


1475-1500


1500-1525


1525-1550


1550-1575


1575-1600


These maps can also be viewed in a separate window:

* {Download}`1450-1475<map_europe_1450-1475.html>`
* {Download}`1475-1500<map_europe_1475-1500.html>`
* {Download}`1500-1525<map_europe_1500-1525.html>`
* {Download}`1525-1550<map_europe_1525-1550.html>`
* {Download}`1550-1575<map_europe_1550-1575.html>`
* {Download}`1575-1600<map_europe_1575-1600.html>`


