In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
from os.path import join
import json
import re
from collections import Counter
from tqdm import tqdm
import seqsim

In [2]:
path = 'production_layer.json'
json_file = open(path)
json_data = json.load(json_file)

## Number of lines

In [3]:
data = []

for book in json_data:
    if 'date' in book:
        year = int(str(book['date'])[:4])
        if year < 1450:
            if 'nr_lines' in book:
                data.append(book['nr_lines'])
                
lines_freq = Counter(data)

x_axis = []
y_axis = []

for lines in sorted(lines_freq):
    x_axis.append(lines)
    y_axis.append(lines_freq[lines])
    
fig = px.bar( x= x_axis , 
             y = y_axis ,
                height=350 , 
             labels={
                     "x": "Number of lines",
                     "y": "Frequency"
                 },)

fig.write_image('number_of_lines.png')

## Book Block Height

In [4]:
data = []

for book in json_data:
    if 'date' in book:
        year = int(str(book['date'])[:4])
        if year < 1450:
            if 'book_block_height' in book:
                data.append(int(book['book_block_height']))
                

                
lines_freq = Counter(data)

x_axis = []
y_axis = []

for lines in sorted(lines_freq):
    x_axis.append(lines)
    y_axis.append(lines_freq[lines])
    
fig = px.bar( x= x_axis , 
             y = y_axis ,
                height=350 , 
             width = 1000 ,
             labels={
                     "x": "Book block Height",
                     "y": "Frequency"
                 },)

fig.write_image('book_block_height.png')

In [5]:
fig = px.box(y=x_axis , height=450 )
fig.write_image('book_block_height_boxplot.png')

# Text Surface Height

In [6]:
data = []

for book in json_data:
    if 'date' in book:
        year = int(str(book['date'])[:4])
        if year < 1450:
            if 'text_surface_height' in book:
                data.append(float(book['text_surface_height']))
                

                
lines_freq = Counter(data)

x_axis = []
y_axis = []

for lines in sorted(lines_freq):
    x_axis.append(lines)
    y_axis.append(lines_freq[lines])
    
fig = px.bar( x= x_axis , 
             y = y_axis ,
                height=350 ,
             labels={
                     "x": "Text surface height",
                     "y": "Frequency"
                 },)

fig.write_image('text_surface.png')

In [7]:
fig = px.box(y=x_axis , height=450 )
fig.write_image('text_surface_boxplot.png')