In [1]:
import pandas as pd
import json
import re
from os.path import join

In [2]:
data_file = join('Data','arma_ubl.json')

with open(data_file,encoding='utf-8') as file:
    data = json.loads( file.read() )

In [3]:
record_nr = 0
for field in data[record_nr]:
    print(f'{field}\t{data[record_nr][field]}')

id	990026841800302711_item_3218582
title	Warlicher bericht: wie von den dreyen Churfürsten und Fürsten, Namlich Tryer, Pfaltz unnd Hessen, weylandt Frantz von Sickingen uberzogen: Auch was sich im selbigen mit eroberung seiner und anderer Schlösser, unnd sunst von tag zu tag begeben, durch den Erenhalt verzeychet : Anno M.D.XXIII
language	German
latitude	52.15274
longitude	4.4836
concepts	['http://id.loc.gov/authorities/subjects/sh85085001', 'http://www.wikidata.org/entity/Q107274053', 'http://vocab.getty.edu/aat/300404588', 'http://data.europeana.eu/concept/2847', 'http://data.europeana.eu/concept/2967', 'http://www.wikidata.org/entity/Q107274057']
preview	https://api.europeana.eu/thumbnail/v2/url.json?uri=https%3A%2F%2Fiiif.universiteitleiden.nl%2Fiiif%2F2%2Fhdl%253A1887.1%252Fitem%253A3218597%2Ffull%2F800%2C%2F0%2Fdefault.jpg&type=TEXT
concepts_labels	{'http://id.loc.gov/authorities/subjects/sh85085001': 'Middle Ages', 'http://www.wikidata.org/entity/Q107274053': 'Reading culture

## Map

In [4]:
gis_data = dict()

for ms in data:

    lat = round(float(ms['latitude']),5)
    long = round(float(ms['longitude']),5)
    gis_data[ (lat,long) ] = gis_data.get( (lat,long), '' ) + f'''<a href="{ms['europeana_landingpage']}" target="_blank">{ms['shelfmark']}</a> ; '''


In [5]:
def scale_number(unscaled, to_min, to_max, from_min, from_max):
    return (to_max-to_min)*(unscaled-from_min)/(from_max-from_min)+to_min

def scale_list(l, to_min, to_max):
    return [scale_number(i, to_min, to_max, min(l), max(l)) for i in l]

max_nr = 0 
for lat,long in gis_data:
    nr_mss = re.split( ';' , gis_data[ (lat,long) ] )
    if len(nr_mss) > max_nr:
        max_nr = len(nr_mss)

nr_list = range(1,max_nr+1)
new_list = scale_list(nr_list , 10, 20)

rescaled_dict = dict()

for i,nr in enumerate(nr_list):
    rescaled_dict[nr] = new_list[i]
    


In [6]:
geo = dict()
feature_collection = dict()
feature_collection['type'] = 'FeatureCollection'
features = []
for lat,long in gis_data:
    nr_mss = re.split( ';' , gis_data[ (lat,long) ] )    
    gis_data[ (lat,long) ] = re.sub( r';$' , '' , gis_data[ (lat,long) ] )
    geo_json = dict()
    geo_json['type'] = 'Feature'
    geo_json['properties'] = { 'description': gis_data[ (lat,long) ] , 'number': rescaled_dict[ len(nr_mss) ] }
    point = dict()

    point['type'] = 'Point'
    point['coordinates'] = [ long , lat ]
    geo_json['geometry'] = point
    features.append(geo_json)

feature_collection['features'] = features
json_out = json.dumps( feature_collection , indent = 4 )
out = open(  join('Data','arma_map.js') , 'w')
out.write( f'''var arma_data = {str(json_out)}''')
out.close()


The code above creates <a href="https://bookandbyte.universiteitleiden.nl/DACH/arma_ubl.php?v=map" target="_blank">an interactive map</a> which indicates the locations on which the manuscripts have been created. 

## Network

In [7]:
edges = []
wikidata = dict()

for ms in data:
    if 'agents' in ms:
        agents = ms['agents']
        for a in agents:
            wikidata[a] = agents[a]
    
    msid = ms['shelfmark']
    
    for creator in ms['creator']:
        if re.search( r'^http' , creator ):
            if re.search( 'wikidata' , creator ):

                creator_name = wikidata.get(creator)
                if creator_name:
                    edges.append( (msid,creator_name) )
        else:
            edges.append( (msid,creator) )

In [8]:
nodes = dict()
nodes_type = dict()

for e1,e2 in edges:
    nodes_type[e1] = 'Book'
    nodes_type[e2] = 'Person'

node_id = 0 

for node in nodes_type:
    #print(node)
    node_id += 1
    nodes[node] = node_id
    
nodes_file = open( f'nodes.csv','w',encoding='utf-8')
edges_file = open( f'edges.csv','w',encoding='utf-8')

edges_file.write('Source,Target\n')
for e1,e2 in edges:
    edges_file.write( f'{int(nodes[e1])},{int(nodes[e2])}\n' )
    
nodes_file.write('Id,Label,Type\n')

for node in nodes:
    nodes_file.write( f'{nodes[node]},"{node}",{nodes_type[node]}\n' )
        
nodes_file.close() 
edges_file.close()

In [9]:


nodes_file = open( f'nodes.csv','w',encoding='utf-8')
edges_file = open( f'edges.csv','w',encoding='utf-8')

edges_file.write('Source,Target\n')
for e1,e2 in edges:
    edges_file.write( f'{int(nodes[e1])},{int(nodes[e2])}\n' )
    
nodes_file.write('Id,Label,Type\n')

for node in nodes:
    nodes_file.write( f'{nodes[node]},"{node}",{nodes_type[node]}\n' )
        
nodes_file.close() 
edges_file.close()

In [12]:
import networkx as nx
from networkx.algorithms import community 
from pyvis.network import Network

nodes_df = pd.read_csv(f'nodes.csv' )
edges_df = pd.read_csv(f'edges.csv' )

G = nx.Graph()

for i,row in nodes_df.iterrows():
    
    node = row['Id']
    label= row['Label']
    if row['Type'] == 'Book':
        c ='#EE7733'
    else:
        c = '#007788'  
    G.add_node( node , label=label, title = label ,  color= c , strokeWidth=200)
                
for i,row in edges_df.iterrows():
    G.add_edge( int(row['Source']) , int(row['Target']) )
    
#Setting up size attribute, based on degree
scale = 10
d = dict(G.degree)
#Updating dict
d.update((x, scale*y) for x, y in d.items())

nx.set_node_attributes(G,d,'size')    
    
nt = Network( '100%' , '100%' ,  bgcolor="#dce5f2" )

nt.force_atlas_2based(
        gravity=-60,
        central_gravity=0.01,
        spring_length=100,
        spring_strength=0.08,
        damping=0.4,
        overlap= 0 )


nt.from_nx(G)
nt.show('arma_network.html')

The code in this section creates <a href="https://bookandbyte.universiteitleiden.nl/DACH/arma_ubl.php?v=network" target="_blank">a network visualisation</a> which indicates the relations between authors and manuscripts. 

## Timeline

In [None]:

import os
import re
import json

lines = []
events = []



def create_event( start_date , shelfmark , img , title ):

    new_event = dict()
    text = { "headline":f'{ shelfmark }',"text":f'{ title }' }
    if len(img) > 0:
        new_event["media"] = { 'url': img }
    new_event["start_date"] = { 'year':start_date }
    new_event["text"] = text
    return new_event



for ms in data:

    start_date = ms.get('year') 
    if start_date is None:
        century = ms.get('timespan')
        if century is not None:
            if re.search( '^1' , century ):
                start_date = century[:2] + '50'
            else:
                start_date = century[:1] + '50'
    if not(re.search('^\d{3,4}$' , str(start_date))):
        if start_date is not None: 
            start_date = int(str(start_date)[:4])



    if start_date is not None: 
        #print(ms['id'])
        new_event = create_event( start_date , f'''<a href="{ms['europeana_landingpage']}" target="_blank">{ms['shelfmark']}</a>''' , ms['preview'] , ms['title'] )
    events.append( new_event )



title = dict()
title['text'] = { 'headline': 'Medieval Manuscripts from the collections of Leiden University Libraries' , 'text': '<p>ARMA</p>'}

json_file = { 'title': title , 'events': events }

json_out = json.dumps( json_file , indent = 4 )
#print( json_out )
out = open(  'arma_bnf_timeline.json' , 'w')
out.write( str(json_out) )
out.close()


The code in this section creates <a href="https://bookandbyte.universiteitleiden.nl/DACH/arma_ubl.php?v=timeline" target="_blank">a timeline</a> which indicating the years in whihc the manuscripts were produced.  