In [1]:
# This notebook provides an overview of the BookSampo data as it is in the summer 2022.


In [2]:
# packages

import matplotlib.pyplot as plt
import numpy    as np
#import networkx as nx
from operator import itemgetter
import pandas   as pd
import rdflib as rdflib
import re
from scipy.signal import convolve2d
import seaborn as sns
from SPARQLWrapper import SPARQLWrapper, JSON, POST
import sys
from rdflib.namespace import XSD, Namespace
from rdflib.term import URIRef

import IPython
import pprint
pp = pprint.PrettyPrinter(indent=4)

import glob

In [3]:
from helpers import *
from constants import *

## A comparison to 2013 

In [4]:
# As of 2022

litworks ="""
SELECT   (COUNT(DISTINCT ?teos) as ?literaryWorks) (COUNT(DISTINCT ?edition) as ?editions) (COUNT(DISTINCT ?editionpart) as ?editionParts) (COUNT(DISTINCT ?alleditions) as ?allEditions) 
WHERE {
  ?teos a/rdfs:subClassOf* <http://www.yso.fi/onto/kaunokki#teos> . # class teos or subclass of teos

  
  OPTIONAL { ?teos <http://www.yso.fi/onto/kaunokki#manifests_in> ?edition }
  OPTIONAL {?teos <http://www.yso.fi/onto/kaunokki#manifests_in_part> ?editionpart }
  BIND( COALESCE(?edition, ?editionpart) as ?alleditions)
  
  
}

"""

# Query works
sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
sparql.setQuery(PREFIXES + litworks)
sparql.setReturnFormat(JSON)
sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
results = sparql.query().convert()

vars = results['head']['vars']

#print("Query returned {} results".format(len(results["results"]["bindings"])))
lit = JSON2Pandas(results)


PICS =""" 
SELECT  (COUNT(DISTINCT ?kuva) as ?bookCovers) 
WHERE {
  ?kuva a/rdfs:subClassOf* <http://www.yso.fi/onto/kaunokki#kansi> .
  
}
"""

# Query cover pictures
sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
sparql.setQuery(PREFIXES + PICS)
sparql.setReturnFormat(JSON)
sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
results = sparql.query().convert()

vars = results['head']['vars']

#print("Query returned {} results".format(len(results["results"]["bindings"])))
covers = JSON2Pandas(results)

In [5]:
# other features

Q =""" 
SELECT (COUNT(DISTINCT ?actor) as ?mainChars)  (COUNT(DISTINCT ?review) as ?contemporaryReviews)   (COUNT(DISTINCT ?link) AS ?weblinks) (COUNT(DISTINCT ?sarja) AS ?LitSeries) (COUNT(DISTINCT ?palkinto) AS ?litAwards) (COUNT(DISTINCT ?palkintosarja) AS ?litAwardSeries) 
(COUNT(DISTINCT ?film) AS ?movies)  (COUNT(DISTINCT ?person) AS ?people) (COUNT(DISTINCT ?authorpic) AS ?authorPics) (COUNT(DISTINCT ?publisher) AS ?publishers)

WHERE {

  { ?review a kaunokki:review . }
  
  UNION
  
  {
    ?sarja a <http://www.yso.fi/onto/kaunokki#sarja> . }
  
   UNION
  
  {
    ?teos a/rdfs:subClassOf* <http://www.yso.fi/onto/kaunokki#teos> . # class teos or subclass of teos
  	?teos kaunokki:paahenkilo ?actor . 
  }
  
  UNION {
   ?palkinto a 	kaunokki:palkintoInstanssi . 
  }
  
   UNION {
   ?s	kaunokki:palkintosarja ?palkintosarja . 
  }
  UNION {
  ?film a <http://www.yso.fi/onto/kaunokki#film> . #==> elokuvat
  }
   UNION {
  ?person a foaf:Person .
  }
   UNION {
  ?authorpic a <http://seco.tkk.fi/saha3/kirjasampo/KirjailijanKuva> .
  }
  UNION
  
  {
    ?publisher a kaunokki:kustantaja .
    
  }
  
  UNION {
    ?link a kaunokki:Link .
}
  
}
"""

# Query cover pictures
sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
sparql.setQuery(PREFIXES + Q)
sparql.setReturnFormat(JSON)
sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
results = sparql.query().convert()

vars = results['head']['vars']

#print("Query returned {} results".format(len(results["results"]["bindings"])))
numbers = JSON2Pandas(results)

In [6]:
# join these queries
allnumbers = lit.join(covers).join(numbers)

print(allnumbers.T.style.to_latex())

\begin{tabular}{lr}
 & 0 \\
literaryWorks & 207771 \\
editions & 213161 \\
editionParts & 79447 \\
allEditions & 285518 \\
bookCovers & 112971 \\
mainChars & 45397 \\
contemporaryReviews & 14644 \\
weblinks & 25017 \\
LitSeries & 8374 \\
litAwards & 6310 \\
litAwardSeries & 290 \\
movies & 2010 \\
people & 62207 \\
authorPics & 4140 \\
publishers & 5455 \\
\end{tabular}





*As of 2013* 
(done by Eetu Mäkelä)

- Literary Works 93,000
- Editions 127,000
- Book Covers 27,000
- Fictional Characters 19,000
- Contemporary Reviews 15,000
- Weblinks 10,000
- Literary Series 2,900
- Literary Awards 2,700
- Literary Award Series 200
- Movies 1,100
- People (e.g. Authors) 29,000
- Author’s Pictures 2,600
- Publishers 2,600

## Literary works per class

In [7]:
QUERY = """
SELECT  ?class_label ?bookclass (COUNT(DISTINCT ?id) as ?instanceCount) WHERE {
 ?bookclass rdfs:subClassOf <http://www.yso.fi/onto/kaunokki#teos> ; skos:prefLabel ?class_label .
  FILTER(LANG(?class_label)='fi')
  
  ?id a ?bookclass .
  ?id kaunokki:alkukieli <http://lexvo.org/id/iso639-3/fin> .
} 

GROUP BY ?class_label ?bookclass
ORDER BY ?class_label

"""

# Query works
sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
sparql.setQuery(PREFIXES + QUERY)
sparql.setReturnFormat(JSON)
sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
results = sparql.query().convert()

vars = results['head']['vars']

print("Query returned {} results".format(len(results["results"]["bindings"])))


Query returned 24 results


In [8]:
data = JSON2Pandas(results)
data.reset_index()#set_index('id', inplace=True)
print(len(data))
display(data)

24


Unnamed: 0,class_label,bookclass,instanceCount
0,aforismikokoelmat,http://seco.tkk.fi/saha3/kirjasampo/Aforismiko...,385
1,esseekokoelmat,http://www.yso.fi/onto/kaunokki#essee,376
2,kansanrunokokoelmat,http://www.yso.fi/onto/kaunokki#kansanruno,166
3,kansanrunot,http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot,43
4,kokoomateokset,http://www.yso.fi/onto/kaunokki#kokoomateos,1759
5,kuunnelmat,http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat,59
6,"kuunnelmat, kokoelmat",http://www.yso.fi/onto/kaunokki#kuunnelma,144
7,kuvakirjat,http://www.yso.fi/onto/kaunokki#kuvakirjat,3370
8,käsikirjoitukset,http://www.yso.fi/onto/kaunokki#kasikirjoitus,47
9,novellikokoelmat,http://www.yso.fi/onto/kaunokki#novellikokoelma,2794


In [9]:
class_count=data["class_label"]+" ("+data["instanceCount"].astype(str) +")"
class_count

0         aforismikokoelmat (385)
1            esseekokoelmat (376)
2       kansanrunokokoelmat (166)
3                kansanrunot (43)
4           kokoomateokset (1759)
5                 kuunnelmat (59)
6     kuunnelmat, kokoelmat (144)
7               kuvakirjat (3370)
8           käsikirjoitukset (47)
9         novellikokoelmat (2794)
10               novellit (18693)
11               näytelmät (2164)
12     näytelmät, kokoelmat (144)
13          pakinakokoelmat (820)
14                   pakinat (52)
15               romaanit (23294)
16          runokokoelmat (10698)
17                    runot (353)
18                   sadut (8287)
19       sarjakuva-albumit (1758)
20                sarjakuvat (35)
21           satukokoelmat (1463)
22             tietokirjat (1159)
23        vitsit ja kaskut  (485)
dtype: object

## see annotations per class



In [10]:
# loop through all classes
Q = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a ?class . # teos is an abstract work
    #?teos kaunokki:alkukieli <http://lexvo.org/id/iso639-3/fin> . # if you want only literature originally written in Finnish
  
   OPTIONAL { 
    
    ?teos a ?class .
    FILTER NOT EXISTS  { ?teos kaunokki:teema ?teema }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
variables = ["kaunokki:teema", "kaunokki:asiasana", "kaunokki:toimija" , "kaunokki:paahenkilo", "kaunokki:alkukieli", "kaunokki:genre", 
             "kaunokki:tekija", "kaunokki:paikka", "kaunokki:worldPlace", "kaunokki:manifests_in", "kaunokki:hasTimeOfStory", "skos:prefLabel", "sch:isbn", "dce:description"]

['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [11]:
## loop and save results into a dictionary
stats2 = {c:{v:"" for v in variables} for c in classes}


for c in classes:
    print (c)
    for v in variables:
        Q_ = Q.replace("class", c) # set the wanted class
        Q_ = Q_.replace("kaunokki:teema", v)  # loop through all variables
        sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
        sparql.setQuery(PREFIXES + Q_)
        sparql.setReturnFormat(JSON)
        sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
        results3 = sparql.query().convert()

        vars = results3['head']['vars']
        result = results3["results"]["bindings"][0]["result"]["value"] 
        print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
        stats2[c][v] = result

<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>
Query returned 1 result: 0.581436077057793345008757
Query returned 1 result: 0.987740805604203152364273
Query returned 1 result: 0.8756567425569176882662
Query returned 1 result: 0.978984238178633975481611
Query returned 1 result: 0.033274956217162872154116
Query returned 1 result: 0.059544658493870402802102
Query returned 1 result: 0.171628721541155866900175
Query returned 1 result: 0.996497373029772329246935
Query returned 1 result: 0.970227670753064798598949
Query returned 1 result: 0.014010507880910683012259
Query returned 1 result: 0.996497373029772329246935
Query returned 1 result: 0.014010507880910683012259
Query returned 1 result: 0.297723292469352014010508
Query returned 1 result: 0.362521891418563922942207
<http://www.yso.fi/onto/kaunokki#essee>
Query returned 1 result: 0.452950558213716108452951
Query returned 1 result: 0.972886762360446570972887
Query returned 1 result: 0.824561403508771929824561
Query returned 1 resul

Query returned 1 result: 0.956461644782308223911541
Query returned 1 result: 0.872840359364201796821009
Query returned 1 result: 0.912923289564616447823082
Query returned 1 result: 0.041465100207325501036628
Query returned 1 result: 0.247408431237042156185211
Query returned 1 result: 0.01451278507256392536282
Query returned 1 result: 0.928472702142363510711818
Query returned 1 result: 0.95680718728403593642018
Query returned 1 result: 0.072909467864547339322737
Query returned 1 result: 0.958880442294402211472011
Query returned 1 result: 0.008293020041465100207326
Query returned 1 result: 0.859364201796821008984105
Query returned 1 result: 0.7885279889426399447132
<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>
Query returned 1 result: 0.807962529274004683840749
Query returned 1 result: 0.976580796252927400468384
Query returned 1 result: 0.892271662763466042154567
Query returned 1 result: 0.918032786885245901639344
Query returned 1 result: 0.053864168618266978922717
Que

Query returned 1 result: 0.308058483509010540632438
Query returned 1 result: 0.562393743624617477048623
<http://www.seco.tkk.fi/applications/saha#Instance_ID1237984819752>
Query returned 1 result: 0.288196218702095043433827
Query returned 1 result: 0.968829841594276954522228
Query returned 1 result: 0.734287174246295350025549
Query returned 1 result: 0.798671435871231476750128
Query returned 1 result: 0.124680633622892181911088
Query returned 1 result: 0.101686254471129279509453
Query returned 1 result: 0.062851303014818599897803
Query returned 1 result: 0.954522227899846704138988
Query returned 1 result: 0.727133367399080224833929
Query returned 1 result: 0.014818599897802759325498
Query returned 1 result: 0.910066428206438426162494
Query returned 1 result: 0.01430761369443025038324
Query returned 1 result: 0.451711803781297904956566
Query returned 1 result: 0.428206438426162493612672
<http://seco.tkk.fi/saha3/u578baac2-f84b-4906-af9e-99f85ce645a4>
Query returned 1 result: 0.535616438

In [12]:
# to dataframe
statdf = pd.DataFrame()
classlabels = list(data["class_label"])
n=0
for k,v in stats2.items():
    #print(v)
    ix =classes.index(k)
    key = classlabels[ix]
    temp = pd.DataFrame(data=v, index= [key])
    statdf=pd.concat([statdf,temp])

In [13]:
# change column names to remove the "kaunokki:" part
columnnames = list(statdf.columns)
columnnames = [c.split(":")[1] for c in columnnames]

statdf.columns= columnnames
statdf.head()

Unnamed: 0,teema,asiasana,toimija,paahenkilo,alkukieli,genre,tekija,paikka,worldPlace,manifests_in,hasTimeOfStory,prefLabel,isbn,description
aforismikokoelmat,0.5814360770577933,0.9877408056042032,0.8756567425569176,0.978984238178634,0.0332749562171628,0.0595446584938704,0.1716287215411558,0.9964973730297724,0.9702276707530648,0.0140105078809106,0.9964973730297724,0.0140105078809106,0.297723292469352,0.3625218914185639
esseekokoelmat,0.4529505582137161,0.9728867623604464,0.8245614035087719,0.8421052631578947,0.036682615629984,0.1881977671451355,0.0558213716108452,0.9617224880382776,0.909090909090909,0.0207336523125996,0.9760765550239234,0.0191387559808612,0.4035087719298245,0.3349282296650717
kansanrunokokoelmat,0.6666666666666666,0.8902439024390243,0.8699186991869918,0.9390243902439024,0.0853658536585365,0.089430894308943,0.2520325203252032,0.983739837398374,0.8495934959349593,0.0121951219512195,0.9959349593495934,0.0040650406504065,0.646341463414634,0.6260162601626016
kansanrunot,0.9772727272727272,1.0,0.9318181818181818,1.0,0.0227272727272727,0.5,0.1136363636363636,1.0,1.0,1.0,1.0,0.0,1.0,1.0
kokoomateokset,0.6940577249575551,0.933106960950764,0.8516129032258064,0.9056027164685908,0.0621392190152801,0.1144312393887945,0.2797962648556876,0.9405772495755516,0.9120543293718166,0.0044142614601018,0.9714770797962649,0.0033955857385398,0.6241086587436332,0.4522920203735144


In [14]:
statdf=statdf.astype(float)
statdf = 1-statdf
#statdf.to_csv("../../output/results/annotation_stats.csv", sep="\t")
statdf.head()

Unnamed: 0,teema,asiasana,toimija,paahenkilo,alkukieli,genre,tekija,paikka,worldPlace,manifests_in,hasTimeOfStory,prefLabel,isbn,description
aforismikokoelmat,0.418564,0.012259,0.124343,0.021016,0.966725,0.940455,0.828371,0.003503,0.029772,0.985989,0.003503,0.985989,0.702277,0.637478
esseekokoelmat,0.547049,0.027113,0.175439,0.157895,0.963317,0.811802,0.944179,0.038278,0.090909,0.979266,0.023923,0.980861,0.596491,0.665072
kansanrunokokoelmat,0.333333,0.109756,0.130081,0.060976,0.914634,0.910569,0.747967,0.01626,0.150407,0.987805,0.004065,0.995935,0.353659,0.373984
kansanrunot,0.022727,0.0,0.068182,0.0,0.977273,0.5,0.886364,0.0,0.0,0.0,0.0,1.0,0.0,0.0
kokoomateokset,0.305942,0.066893,0.148387,0.094397,0.937861,0.885569,0.720204,0.059423,0.087946,0.995586,0.028523,0.996604,0.375891,0.547708


In [15]:
#statdf["class_count"]=class_count
statdf=statdf.set_index(class_count)

In [16]:
# add background gradient for visualization


neworder= ["tekija","prefLabel", "genre","teema", "asiasana", "paahenkilo","toimija","paikka", "worldPlace","isbn","hasTimeOfStory"]

statdf[neworder].style.background_gradient(axis=None)

Unnamed: 0,tekija,prefLabel,genre,teema,asiasana,paahenkilo,toimija,paikka,worldPlace,isbn,hasTimeOfStory
aforismikokoelmat (385),0.828371,0.985989,0.940455,0.418564,0.012259,0.021016,0.124343,0.003503,0.029772,0.702277,0.003503
esseekokoelmat (376),0.944179,0.980861,0.811802,0.547049,0.027113,0.157895,0.175439,0.038278,0.090909,0.596491,0.023923
kansanrunokokoelmat (166),0.747967,0.995935,0.910569,0.333333,0.109756,0.060976,0.130081,0.01626,0.150407,0.353659,0.004065
kansanrunot (43),0.886364,1.0,0.5,0.022727,0.0,0.0,0.068182,0.0,0.0,0.0,0.0
kokoomateokset (1759),0.720204,0.996604,0.885569,0.305942,0.066893,0.094397,0.148387,0.059423,0.087946,0.375891,0.028523
kuunnelmat (59),0.924051,1.0,0.64557,0.139241,0.0,0.151899,0.050633,0.0,0.0,0.101266,0.0
"kuunnelmat, kokoelmat (144)",0.942308,0.99359,0.961538,0.032051,0.0,0.025641,0.00641,0.00641,0.00641,0.044872,0.0
kuvakirjat (3370),0.895284,0.999688,0.99465,0.731664,0.060773,0.311396,0.617338,0.188967,0.05054,0.957147,0.002441
käsikirjoitukset (47),0.881579,0.907895,0.657895,0.092105,0.039474,0.039474,0.013158,0.0,0.013158,0.026316,0.039474
novellikokoelmat (2794),0.884604,0.999133,0.911612,0.304593,0.036973,0.040006,0.120451,0.055893,0.084778,0.411901,0.048382


In [17]:
# the annotations show which aspects are important for each class
## e.g. worldplace usually not relevant for poems
## has time of story for novels might indicate historical writing (or sci-fi)
## worldplace is still surprisingly low for novels -- does this reduce my analysis too much?

In [56]:
# Finnish novels
# Translated novels
# Authors
decs=[(1970,1981),(1980,1991),(1990,2001),(2000,2011),(2010,2021)]
decs=[(str(t[0]),str(t[1])) for t in decs]

# loop through all classes
Q_fi = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli <http://lexvo.org/id/iso639-3/fin> . # if you want only literature originally written in Finnish
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
   
   
   OPTIONAL { 
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli <http://lexvo.org/id/iso639-3/fin> . # if you want only literature originally written in Finnish
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year .
    ?year skos:prefLabel ?yearuri
    FILTER(xsd:integer(?yearuri) > 1970)   
    FILTER(xsd:integer(?yearuri) < 1981)  
    FILTER NOT EXISTS  { ?teos kaunokki:teema ?teema }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
variables = ["kaunokki:teema", "kaunokki:asiasana", "kaunokki:toimija" , "kaunokki:paahenkilo", "kaunokki:alkukieli", "kaunokki:genre", 
             "kaunokki:tekija", "kaunokki:paikka", "kaunokki:worldPlace", "kaunokki:manifests_in", "kaunokki:hasTimeOfStory", "skos:prefLabel", "sch:isbn", "dce:description"]

## loop and save results into a dictionary
stats2 = {c:{v:"" for v in variables} for c in ["fi","tr","au"]}


#for c in classes:
   # print (c)
for v in variables:
    stats2["fi"][v]={}
    for dec in decs:
        print(dec)
        #Q_ = Q.replace("class", c) # set the wanted class
        Q_ = Q_fi.replace("kaunokki:teema", v)  # loop through all variables
        #Q_ = Q_.replace("197", dec)  # loop through all variables
        Q_ = Q_.replace("1970", dec[0])  # loop through all variables
        Q_ = Q_.replace("1981", dec[1])  # loop through all variables
        sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
        sparql.setQuery(PREFIXES + Q_)
        sparql.setReturnFormat(JSON)
        sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
        results3 = sparql.query().convert()

        vars = results3['head']['vars']
        result = results3["results"]["bindings"][0]["result"]["value"] 
        print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
        
        stats2["fi"][v][dec] = result

['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [58]:
stats2
df_stats_fi=stats2["fi"]

In [59]:
# Translated novels
decs=[(1970,1981),(1980,1991),(1990,2001),(2000,2011),(2010,2021)]
decs=[(str(t[0]),str(t[1])) for t in decs]
# loop through all classes
Q_fi = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli != <http://lexvo.org/id/iso639-3/fin>)
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
   
   
   OPTIONAL { 
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli != <http://lexvo.org/id/iso639-3/fin>)
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
    FILTER NOT EXISTS  { ?teos kaunokki:teema ?teema }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
variables = ["kaunokki:teema", "kaunokki:asiasana", "kaunokki:toimija" , "kaunokki:paahenkilo", "kaunokki:alkukieli", "kaunokki:genre", 
             "kaunokki:tekija", "kaunokki:paikka", "kaunokki:worldPlace", "kaunokki:manifests_in", "kaunokki:hasTimeOfStory", "skos:prefLabel", "sch:isbn", "dce:description"]

## loop and save results into a dictionary
#stats2 = {c:{v:"" for v in variables} for c in ["fi","tr"]}


#for c in classes:
   # print (c)
for v in variables:
    stats2["tr"][v]={}
    for dec in decs:
        #Q_ = Q.replace("class", c) # set the wanted class
        Q_ = Q_fi.replace("kaunokki:teema", v)  # loop through all variables
        #Q_ = Q_.replace("197", dec)  # loop through all variables
        Q_ = Q_.replace("1970", dec[0])  # loop through all variables
        Q_ = Q_.replace("1981", dec[1])  # loop through all variables
        sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
        sparql.setQuery(PREFIXES + Q_)
        sparql.setReturnFormat(JSON)
        sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
        results3 = sparql.query().convert()

        vars = results3['head']['vars']
        result = results3["results"]["bindings"][0]["result"]["value"] 
        print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
        
        stats2["tr"][v][dec] = result

['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [60]:
df_stats_tr=stats2["tr"]

In [80]:
# translated authors

# loop through all classes
Q_fi = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:tekija ?author .
    
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli != <http://lexvo.org/id/iso639-3/fin>)
    
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
   
   
   OPTIONAL { 
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli != <http://lexvo.org/id/iso639-3/fin>)
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
    ?teos kaunokki:tekija ?author .
    FILTER NOT EXISTS  { ?author kaunokki:teema ?teema }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
variables = ["foaf:gender", "kaunokki:kansallisuus", "kaunokki:aidinkieli" , "kaunokki:timeOfBirth", "kaunokki:hasLivedIn", "kaunokki:placeOfBirth", 
             ]

## loop and save results into a dictionary
#stats2 = {c:{v:"" for v in variables} for c in ["fi","tr","au"]}


#for c in classes:
   # print (c)
for v in variables:
    stats2["au"][v]={}
    for dec in decs:
        #Q_ = Q.replace("class", c) # set the wanted class
        Q_ = Q_fi.replace("kaunokki:teema", v)  # loop through all variables
        #Q_ = Q_.replace("197", dec)  # loop through all variables
        Q_ = Q_.replace("1970", dec[0])  # loop through all variables
        Q_ = Q_.replace("1981", dec[1])  # loop through all variables
        sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
        sparql.setQuery(PREFIXES + Q_)
        sparql.setReturnFormat(JSON)
        sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
        results3 = sparql.query().convert()

        vars = results3['head']['vars']
        result = results3["results"]["bindings"][0]["result"]["value"] 
        print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
        
        stats2["au"][v][dec] = result

['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [79]:
# Finnish authors

# loop through all classes
Q_fi = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:tekija ?author .
    
    ?teos kaunokki:alkukieli ?alkukieli . # original language Finnish
    FILTER(?alkukieli = <http://lexvo.org/id/iso639-3/fin>)
    
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
   
   
   OPTIONAL { 
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli = <http://lexvo.org/id/iso639-3/fin>)
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
    ?teos kaunokki:tekija ?author .
    FILTER NOT EXISTS  { ?author kaunokki:teema ?teema }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
variables = ["foaf:gender", "kaunokki:kansallisuus", "kaunokki:aidinkieli" , "kaunokki:timeOfBirth", "kaunokki:hasLivedIn", "kaunokki:placeOfBirth", 
             ]

## loop and save results into a dictionary
#stats2 = {c:{v:"" for v in variables} for c in ["fi","tr","au","au_fi"]}

stats2["au_fi"]={}

decs=[(1970,1981),(1980,1991),(1990,2001),(2000,2011),(2010,2021)]
decs=[(str(t[0]),str(t[1])) for t in decs]

#for c in classes:
   # print (c)
for v in variables:
    stats2["au_fi"][v]={}
    for dec in decs:
        #Q_ = Q.replace("class", c) # set the wanted class
        Q_ = Q_fi.replace("kaunokki:teema", v)  # loop through all variables
        Q_ = Q_.replace("1970", dec[0])  # loop through all variables
        Q_ = Q_.replace("1981", dec[1])  # loop through all variables
        sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
        sparql.setQuery(PREFIXES + Q_)
        sparql.setReturnFormat(JSON)
        sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
        results3 = sparql.query().convert()

        vars = results3['head']['vars']
        result = results3["results"]["bindings"][0]["result"]["value"] 
        print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
        
        stats2["au_fi"][v][dec] = result

['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [55]:
stats2["tr"]

KeyError: 'tr'

In [26]:
print(len(variables))

6


In [61]:
# translated authors

# loop through all classes
Q_fi = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    #?teos kaunokki:tekija ?author .
    
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli != <http://lexvo.org/id/iso639-3/fin>)
    
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
   
   
   OPTIONAL { 
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli ?alkukieli . # original language not Finnish
    FILTER(?alkukieli != <http://lexvo.org/id/iso639-3/fin>)
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
    #?teos kaunokki:tekija ?author .
    FILTER EXISTS  { ?julkaisu kaunokki:hasPublisher ?publisher }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
pubvariables = ["kaunokki:julkaisija"
             ]

## loop and save results into a dictionary
#stats2 = {c:{v:"" for v in variables} for c in ["fi","tr","au"]}

pubstats={"fi":{},"tr":{}}
#for c in classes:
   # print (c)
#for v in variables:
#    stats2["au"][v]={}
for dec in decs:
    #Q_ = Q.replace("class", c) # set the wanted class
    #Q_ = Q_fi.replace("kaunokki:teema", v)  # loop through all variables
    Q_ = Q_fi.replace("1970", dec[0])  # loop through all variables
    Q_ = Q_.replace("1981", dec[1])  # loop through all variables
    sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
    sparql.setQuery(PREFIXES + Q_)
    sparql.setReturnFormat(JSON)
    sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
    results3 = sparql.query().convert()

    vars = results3['head']['vars']
    result = results3["results"]["bindings"][0]["result"]["value"] 
    print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)

    pubstats["tr"][dec] = result
    
    


['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [62]:
# Finnish publications
Q_fi = """
SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) 
    WHERE

{
    
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    #?teos kaunokki:tekija ?author .
    
    ?teos kaunokki:alkukieli <http://lexvo.org/id/iso639-3/fin> .
    
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
   
   
   OPTIONAL { 
    ?teos a kaunokki:romaani ; kaunokki:manifests_in ?julkaisu. # teos is a novel
    ?teos kaunokki:alkukieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:kieli <http://lexvo.org/id/iso639-3/fin> .
    ?julkaisu kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearuri .
    FILTER(xsd:integer(?yearuri) > 1970)     FILTER(xsd:integer(?yearuri) < 1981)
    #?teos kaunokki:tekija ?author .
    FILTER EXISTS  { ?julkaisu kaunokki:hasPublisher ?publisher }
    BIND(?teos AS ?teemateos) 
    
  } 
} 
}
"""

classes = list(data["bookclass"])
classes = [c.n3() for c in classes]
print(classes)
pubvariables = ["kaunokki:julkaisija"
             ]

## loop and save results into a dictionary
#stats2 = {c:{v:"" for v in variables} for c in ["fi","tr","au"]}

#pubstats={"fi":{},"tr":{}}
#for c in classes:
   # print (c)
#for v in variables:
#    stats2["au"][v]={}
#    stats2["au"][v]={}
for dec in decs:
    #Q_ = Q.replace("class", c) # set the wanted class
    #Q_ = Q_fi.replace("kaunokki:teema", v)  # loop through all variables
    Q_ = Q_fi.replace("1970", dec[0])  # loop through all variables
    Q_ = Q_.replace("1981", dec[1])  # loop through all variables
    sparql.setReturnFormat(JSON)
    sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
    results3 = sparql.query().convert()

    vars = results3['head']['vars']
    result = results3["results"]["bindings"][0]["result"]["value"] 
    print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)

    pubstats["fi"][dec] = result
    
    

['<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat>', '<http://www.yso.fi/onto/kaunokki#essee>', '<http://www.yso.fi/onto/kaunokki#kansanruno>', '<http://seco.tkk.fi/saha3/kirjasampo/Kansanrunot>', '<http://www.yso.fi/onto/kaunokki#kokoomateos>', '<http://seco.tkk.fi/saha3/kirjasampo/Kuunnelmat>', '<http://www.yso.fi/onto/kaunokki#kuunnelma>', '<http://www.yso.fi/onto/kaunokki#kuvakirjat>', '<http://www.yso.fi/onto/kaunokki#kasikirjoitus>', '<http://www.yso.fi/onto/kaunokki#novellikokoelma>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1237530959951>', '<http://www.yso.fi/onto/kaunokki#naytelma>', '<http://seco.tkk.fi/saha3/u0a6a963d-2b8c-40ee-b213-560653d9806b>', '<http://www.yso.fi/onto/kaunokki#pakina>', '<http://seco.tkk.fi/saha3/kirjasampo/Pakinat>', '<http://www.yso.fi/onto/kaunokki#romaani>', '<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>', '<http://www.yso.fi/onto/kaunokki#runo>', '<http://seco.tkk.fi/saha3/kirjasampo/Sadut>', '<http://www.

In [29]:
# make df


In [63]:
novelvariables=  ["kaunokki:teema",   "kaunokki:genre", 
             "kaunokki:tekija", "kaunokki:paikka", "kaunokki:worldPlace"]

In [64]:
tr_df=pd.DataFrame(stats2["tr"])
fi_df=pd.DataFrame(stats2["fi"])

In [65]:
fi_df[novelvariables]=fi_df[novelvariables].apply(pd.to_numeric)

In [66]:
tr_df[novelvariables]=tr_df[novelvariables].apply(pd.to_numeric)

In [67]:
fi_df[novelvariables]=1-fi_df[novelvariables]

In [68]:
tr_df[novelvariables]=1-tr_df[novelvariables]

In [69]:
fi_df2=fi_df[novelvariables].T
tr_df2=tr_df[novelvariables].T

In [70]:
#fi_df2.values==tr_df2.values
fi_df2

Unnamed: 0_level_0,1970,1980,1990,2000,2010
Unnamed: 0_level_1,1981,1991,2001,2011,2021
kaunokki:teema,0.739583,0.752308,0.793161,0.879876,0.85304
kaunokki:genre,0.908203,0.935897,0.959133,0.962025,0.944322
kaunokki:tekija,0.998047,0.998974,0.997498,0.997417,0.997656
kaunokki:paikka,0.300781,0.250256,0.289825,0.380522,0.40674
kaunokki:worldPlace,0.470052,0.215385,0.379483,0.253165,0.350037


In [71]:
pub_fi=pd.DataFrame(pubstats)
#pubstats
collist=list(pub_fi.columns)
pub_fi[collist]=pub_fi[collist].apply(pd.to_numeric )

In [88]:
pub_fi

Unnamed: 0,Unnamed: 1,fi,tr
1970,1981,0.998927,0.992656
1980,1991,0.998927,0.986942
1990,2001,0.998927,0.987718
2000,2011,0.998927,0.992279
2010,2021,0.998927,0.998927


In [72]:
fi_df2=pd.concat([fi_df2,pub_fi.T.head(1)])
tr_df2=pd.concat([tr_df2,pub_fi.T.tail(1)])

In [81]:
tr_au=pd.DataFrame(stats2["au"])
fi_au=pd.DataFrame(stats2["au_fi"])

In [82]:
fi_au[variables]=fi_au[variables].apply(pd.to_numeric)
tr_au[variables]=tr_au[variables].apply(pd.to_numeric)

In [83]:
fi_au[variables]=1-fi_au[variables]
tr_au[variables]=1-tr_au[variables]

In [84]:
tr_au[variables].T

Unnamed: 0_level_0,1970,1980,1990,2000,2010
Unnamed: 0_level_1,1981,1991,2001,2011,2021
foaf:gender,0.950632,0.965855,0.975089,0.985866,0.991611
kaunokki:kansallisuus,0.926561,0.911733,0.938855,0.963339,0.972252
kaunokki:aidinkieli,0.99592,0.998184,1.0,0.999337,0.996773
kaunokki:timeOfBirth,0.803754,0.798039,0.802976,0.81758,0.747473
kaunokki:hasLivedIn,0.065687,0.085725,0.131349,0.251104,0.261347
kaunokki:placeOfBirth,0.508772,0.500182,0.603041,0.619258,0.532373


In [85]:

corr = fi_au[variables].T
corr.style.background_gradient(cmap='coolwarm', axis=None).set_precision(2)#.style.background_gradient()

  corr.style.background_gradient(cmap='coolwarm', axis=None).set_precision(2)#.style.background_gradient()


Unnamed: 0_level_0,1970,1980,1990,2000,2010
Unnamed: 0_level_1,1981,1991,2001,2011,2021
foaf:gender,1.0,0.99,0.99,0.99,0.98
kaunokki:kansallisuus,1.0,1.0,0.99,0.99,1.0
kaunokki:aidinkieli,1.0,1.0,1.0,1.0,1.0
kaunokki:timeOfBirth,0.89,0.84,0.79,0.8,0.66
kaunokki:hasLivedIn,0.48,0.54,0.61,0.71,0.62
kaunokki:placeOfBirth,0.84,0.78,0.73,0.72,0.5


In [86]:
print(tr_df2.style.set_precision(3).to_latex())

\begin{tabular}{lrrrrr}
 & 1970 & 1980 & 1990 & 2000 & 2010 \\
 & 1981 & 1991 & 2001 & 2011 & 2021 \\
kaunokki:teema & 0.726 & 0.836 & 0.853 & 0.935 & 0.889 \\
kaunokki:genre & 0.975 & 0.972 & 0.993 & 0.997 & 0.997 \\
kaunokki:tekija & 1.000 & 0.999 & 0.999 & 0.999 & 0.998 \\
kaunokki:paikka & 0.319 & 0.300 & 0.298 & 0.442 & 0.478 \\
kaunokki:worldPlace & 0.510 & 0.424 & 0.557 & 0.408 & 0.507 \\
tr & 0.993 & 0.987 & 0.988 & 0.992 & 0.999 \\
\end{tabular}



  print(tr_df2.style.set_precision(3).to_latex())


In [87]:
#fi_df==tr_df

print(fi_df2.style.set_precision(3).to_latex())

\begin{tabular}{lrrrrr}
 & 1970 & 1980 & 1990 & 2000 & 2010 \\
 & 1981 & 1991 & 2001 & 2011 & 2021 \\
kaunokki:teema & 0.740 & 0.752 & 0.793 & 0.880 & 0.853 \\
kaunokki:genre & 0.908 & 0.936 & 0.959 & 0.962 & 0.944 \\
kaunokki:tekija & 0.998 & 0.999 & 0.997 & 0.997 & 0.998 \\
kaunokki:paikka & 0.301 & 0.250 & 0.290 & 0.381 & 0.407 \\
kaunokki:worldPlace & 0.470 & 0.215 & 0.379 & 0.253 & 0.350 \\
fi & 0.999 & 0.999 & 0.999 & 0.999 & 0.999 \\
\end{tabular}



  print(fi_df2.style.set_precision(3).to_latex())


In [46]:
# save
#statdf.to_csv("../../output/results/annotation_stats.csv", sep="\t")
import dataframe_image as dfi
#dfi.export(statdf[neworder].style.background_gradient(axis=None)  ,"../../output/figures/annotation_stats_v2.png")

### development per year for novels

In [47]:
## query group by year to get yearly development of the proportion of annotated works VS no annotation (per class)
Q = """
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?teemateos) AS ?novalue) (SAMPLE(?yearLabel) AS ?vuosi) 
    WHERE

{
    ?teos a ?class .
    ?teos kaunokki:manifests_in ?pub .
    ?pub kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearLabel  .
    FILTER(xsd:integer(?yearLabel) > 1899) # from the beginning of the 20th century

   OPTIONAL { 
    
    ?teos a ?class .
     ?teos kaunokki:manifests_in ?pub .
    ?pub kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearLabel  .
    FILTER(xsd:integer(?yearLabel) > 1899)                      
    FILTER NOT EXISTS  { ?teos kaunokki:teema ?teema }
    BIND(?teos AS ?teemateos) 
    
  }
} GROUP BY ?year 

"""


variables = ["kaunokki:teema", "kaunokki:asiasana", "kaunokki:toimija" , "kaunokki:paahenkilo", "kaunokki:alkukieli", "kaunokki:genre", 
             "kaunokki:tekija", "kaunokki:paikka", "kaunokki:worldPlace", "kaunokki:manifests_in", "kaunokki:hasTimeOfStory", "skos:prefLabel", "sch:isbn", "dce:description"]

In [48]:
stats2 = {c:{v:"" for v in variables} for c in classes}


# loop again 

for c in classes:
    #print (c)
    for v in variables:
        #print(v)
        Q_ = Q.replace("?class", c)
        Q_ = Q_.replace("kaunokki:teema", v)
        print(c,v)
        sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
        sparql.setQuery(PREFIXES + Q_)
        sparql.setReturnFormat(JSON)
        sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
        results3 = sparql.query().convert()

        vars = results3['head']['vars']
        values= [list(i.values() ) for i in results3["results"]["bindings"]]
        #print(values[0][0]["value"])

        result = [[i[0]["value"], values[e][1]["value"], values[e][2]["value"]] for e,i in enumerate(values)]
        #result = [results3["results"]["bindings"][0]["total"]["value"] , results3["results"]["bindings"][0]["novalue"]["value"] , results3["results"]["bindings"][0]["vuosi"]["value"] ]
        print("Query returned {} results:".format(len(results3["results"]["bindings"])))
        stats2[c][v] = result

<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:teema
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:asiasana
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:toimija
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:paahenkilo
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:alkukieli
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:genre
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:tekija
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:paikka
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:worldPlace
Query returned 76 results:
<http://seco.tkk.fi/saha3/kirjasampo/Aforismikokoelmat> kaunokki:manifests_in
Query returned 76 r

KeyboardInterrupt: 

In [None]:
stats2[c]

In [None]:
# plot one class you want

## novels as an example
c="<http://www.yso.fi/onto/kaunokki#romaani>"
#c="<http://www.yso.fi/onto/kaunokki#novellikokoelma>"
#c="<http://www.seco.tkk.fi/applications/saha#Instance_ID1238070215614>" # runokokoelmat
#plt.style.use('seaborn-husl') #'seaborn-dark-palette'

import seaborn as sns

#set_theme() 
sns.set_theme(style="white",palette="PuOr_r")
# https://medium.com/@morganjonesartist/color-guide-to-seaborn-palettes-da849406d44f

fig, ((ax1, ax2, ax3, ax4), (ax5, ax6, ax7,ax8)) = plt.subplots(2, 4, figsize=(16,6), sharex=True)
axlist=[ax1,ax2,ax3,ax4,ax5,ax6,ax7,ax8]
plotkeys=['kaunokki:teema', 'kaunokki:asiasana', 'kaunokki:toimija', 'kaunokki:paahenkilo', 'kaunokki:genre',"kaunokki:paikka", 'kaunokki:worldPlace','kaunokki:hasTimeOfStory']
novels = stats2[c]
for i,k in enumerate(plotkeys):
    tempdf = pd.DataFrame(stats2[c][k], columns=["all", "no_val","year"])
    tempdf["year"]= pd.to_numeric(tempdf["year"])
    tempdf["no_val"]= pd.to_numeric(tempdf["no_val"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    #tempdf=tempdf[tempdf["all"] >10]
    tempdf =tempdf.sort_values(by="year")
    #x= tempdf["year"]
    #y1 = tempdf["all"]
    #y2 = tempdf["no_val"]
    #axlist[i].title.set_text(k)
    #axlist[i].plot(x,y1, label = "all books")
    #axlist[i].plot(x,y2, label="not annotated")
    
    tempdf["yes_val"] = tempdf["all"]-tempdf["no_val"]
    tempdf=tempdf[tempdf["all"] >10]
    tempdf=tempdf[tempdf["year"]<2022]
    tempdf =tempdf.sort_values(by="year")
    x= tempdf["year"]
    y1 = tempdf["yes_val"]
    y2 = tempdf["no_val"]
    axlist[i].title.set_text(k)
    #axlist[i].plot(x,y1, label = "all books")
    #axlist[i].plot(x,y2, label="not annotated")
    axlist[i].stackplot(x,y1, y2, labels=['Annotated','Not annotated'])
    axlist[i].legend(loc='upper left')
    

#ax1.plot(
    

#plt.savefig("fig/novels_annotations.png", format="png")
plt.tight_layout()
plt.savefig("../../output/figures/novels_annotations_2v2.png", format="png")
plt.show()


In [None]:
# perustiedot

plotkeys = [ "kaunokki:alkukieli", "kaunokki:manifests_in",
             "kaunokki:tekija", "skos:prefLabel", "sch:isbn", "dce:description"]

fig, ((ax1, ax2, ax3), (ax5, ax6, ax7)) = plt.subplots(2, 3, figsize=(12,6), sharex=True)
axlist=[ax1,ax2,ax3,ax5,ax6,ax7]

#plotkeys=['kaunokki:teema', 'kaunokki:asiasana', 'kaunokki:toimija', 'kaunokki:paahenkilo', 'kaunokki:genre',"kaunokki:paikka", 'kaunokki:worldPlace','kaunokki:hasTimeOfStory']
novels = stats2[c]
for i,k in enumerate(plotkeys):
    tempdf = pd.DataFrame(stats2[c][k], columns=["all", "no_val","year"])
    tempdf["year"]= pd.to_numeric(tempdf["year"])
    tempdf["no_val"]= pd.to_numeric(tempdf["no_val"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    #tempdf=tempdf[tempdf["all"] >10]
    #tempdf =tempdf.sort_values(by="year")
    #x= tempdf["year"]
    #y1 = tempdf["all"]
    #y2 = tempdf["no_val"]
    #axlist[i].title.set_text(k)
    #axlist[i].plot(x,y1, label = "all books")
    #axlist[i].plot(x,y2, label="not annotated")
    
    
    
    tempdf["yes_val"] = tempdf["all"]-tempdf["no_val"]
    tempdf=tempdf[tempdf["all"] >10]
    tempdf=tempdf[tempdf["year"]<2022]
    tempdf =tempdf.sort_values(by="year")
    x= tempdf["year"]
    y1 = tempdf["yes_val"]
    y2 = tempdf["no_val"]
    axlist[i].title.set_text(k)
    #axlist[i].plot(x,y1, label = "all books")
    #axlist[i].plot(x,y2, label="not annotated")
    axlist[i].stackplot(x,y1, y2, labels=['Annotated','Not annotated'])
    axlist[i].legend(loc='upper left')
    


plt.tight_layout()
plt.savefig("../../output/figures/novels_annotations_1v2.png", format="png")
plt.show()

In [None]:
# kieli, publisher, kansikuva, on ensimmainenversio (approx = suomalainen kirja)
## query group by year to get yearly development of the proportion of annotated works VS no annotation (per class)
Q = """
SELECT (COUNT(DISTINCT ?teos) AS ?total) (COUNT(DISTINCT ?kieliteos) AS ?novalue) (SAMPLE(?yearLabel) AS ?vuosi) 
    WHERE

{
    ?teos a kaunokki:romaani .
    ?teos kaunokki:manifests_in ?pub .
    ?pub kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearLabel  .
    FILTER(xsd:integer(?yearLabel) > 1899) # from the beginning of the 20th century

   OPTIONAL { 
    
    ?teos a kaunokki:romaani .
     ?teos kaunokki:manifests_in ?pub .
    ?pub kaunokki:ilmestymisvuosi ?year . ?year skos:prefLabel ?yearLabel  .
    FILTER(xsd:integer(?yearLabel) > 1899)                      
    FILTER NOT EXISTS  { ?pub kaunokki:kieli ?kieli }
    BIND(?teos AS ?kieliteos) 
    
  }
} GROUP BY ?year 

"""


variables = ["kaunokki:kieli", "kaunokki:onEnsimmainenVersio","kaunokki:hasPublisher","kaunokki:kansikuva","kaunokki:sivuLkm"]

In [None]:
pubstats = {v:"" for v in variables}
for v in variables:
    #print(v)
    #Q_ = Q.replace("?class", c)
    Q_ = Q.replace("kaunokki:kieli", v)
    print(v)
    sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
    sparql.setQuery(PREFIXES + Q_)
    sparql.setReturnFormat(JSON)
    sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
    results3 = sparql.query().convert()

    vars = results3['head']['vars']
    values= [list(i.values() ) for i in results3["results"]["bindings"]]
    #print(values[0][0]["value"])

    result = [[i[0]["value"], values[e][1]["value"], values[e][2]["value"]] for e,i in enumerate(values)]
    #result = [results3["results"]["bindings"][0]["total"]["value"] , results3["results"]["bindings"][0]["novalue"]["value"] , results3["results"]["bindings"][0]["vuosi"]["value"] ]
    print("Query returned {} results:".format(len(results3["results"]["bindings"])))
    pubstats[v] = result

In [None]:
variables = ["kaunokki:kieli", "kaunokki:onEnsimmainenVersio","kaunokki:hasPublisher","kaunokki:kansikuva","kaunokki:sivuLkm"]
fig, ((ax1, ax2, ax3), (ax4,ax5, ax6)) = plt.subplots(2, 3, figsize=(12,6), sharex=True)
axlist=[ax1,ax2,ax3,ax4,ax5,ax6]
#plotkeys=['kaunokki:teema', 'kaunokki:asiasana', 'kaunokki:toimija', 'kaunokki:paahenkilo', 'kaunokki:genre',"kaunokki:paikka", 'kaunokki:worldPlace','kaunokki:hasTimeOfStory']

for i,k in enumerate(variables):
    tempdf = pd.DataFrame(pubstats[k], columns=["all", "no_val","year"])
    tempdf["year"]= pd.to_numeric(tempdf["year"])
    tempdf["no_val"]= pd.to_numeric(tempdf["no_val"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    tempdf["yes_val"] = tempdf["all"]-tempdf["no_val"]
    tempdf=tempdf[tempdf["all"] >10]
    tempdf=tempdf[tempdf["year"]<2022]
    tempdf =tempdf.sort_values(by="year")
    x= tempdf["year"]
    y1 = tempdf["yes_val"]
    y2 = tempdf["no_val"]
    axlist[i].title.set_text(k)
    #axlist[i].plot(x,y1, label = "all books")
    #axlist[i].plot(x,y2, label="not annotated")
    axlist[i].stackplot(x,y1, y2, labels=['Annotated','Not annotated'])
    
    
    axlist[i].legend(loc='upper left')

#ax1.plot(
    

#plt.savefig("fig/novels_annotations.png", format="png")
plt.tight_layout()
plt.savefig("../../output/figures/pubs_annotations.png", format="png")
plt.show()

In [None]:
# combine all into one

#novels.update(pubstats)

variables = [ 
    "kaunokki:tekija", "kaunokki:alkukieli","kaunokki:kieli","skos:prefLabel", 
    "sch:isbn", "kaunokki:hasPublisher","kaunokki:onEnsimmainenVersio","kaunokki:sivuLkm",
    'kaunokki:teema', 'kaunokki:asiasana', 'kaunokki:toimija', 'kaunokki:paahenkilo', 
    'kaunokki:genre',"kaunokki:paikka", 'kaunokki:worldPlace','kaunokki:hasTimeOfStory']
fig, ((ax1, ax2, ax3, ax4), (ax5, ax6, ax7,ax8), (ax9,ax10,ax11,ax12),(ax13,ax14,ax15,ax16)) = plt.subplots(4, 4, figsize=(16,12), sharex=True)
axlist=[ax1,ax2,ax3,ax4,ax5,ax6,ax7,ax8,ax9,ax10,ax11,ax12,ax13,ax14,ax15,ax16]
#plotkeys=['kaunokki:teema', 'kaunokki:asiasana', 'kaunokki:toimija', 'kaunokki:paahenkilo', 'kaunokki:genre',"kaunokki:paikka", 'kaunokki:worldPlace','kaunokki:hasTimeOfStory']

for i,k in enumerate(variables):
    tempdf = pd.DataFrame(novels[k], columns=["all", "no_val","year"])
    tempdf["year"]= pd.to_numeric(tempdf["year"])
    tempdf["no_val"]= pd.to_numeric(tempdf["no_val"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    tempdf["all"]= pd.to_numeric(tempdf["all"])
    tempdf["yes_val"] = tempdf["all"]-tempdf["no_val"]
    tempdf=tempdf[tempdf["all"] >10]
    tempdf=tempdf[tempdf["year"]<2022]
    tempdf =tempdf.sort_values(by="year")
    x= tempdf["year"]
    y1 = tempdf["yes_val"]
    y2 = tempdf["no_val"]
    axlist[i].title.set_text(k)
    #axlist[i].plot(x,y1, label = "all books")
    #axlist[i].plot(x,y2, label="not annotated")
    axlist[i].stackplot(x,y1, y2, labels=['Annotated','Not annotated'])
    
    
    axlist[i].legend(loc='upper left')

#ax1.plot(
    

#plt.savefig("fig/novels_annotations.png", format="png")
plt.tight_layout()
plt.savefig("../../output/figures/novels_annotations_combined.png", format="png")
plt.show()

## People

In [None]:
# this chunk looks at who the people in KirjaSampo are

Q =""" 

SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?person) AS ?total) (COUNT(DISTINCT ?personInfo) AS ?novalue) 
    WHERE

{
    
    ?person a foaf:Person . # all people
    
   OPTIONAL { 
    
    ?person a foaf:Person .
    FILTER EXISTS  { ?person kaunokki:occupation ?info }
    BIND(?person AS ?personInfo) 
    
  } 
} 
}
"""

In [None]:
personvars=  "kaunokki:occupation kaunokki:kansallisuus foaf:gender kaunokki:timeOfBirth kaunokki:timeOfDeath kaunokki:placeOfBirth kaunokki:placeOfDeath kaunokki:hasLivedIn kaunokki:aidinkieli kaunokki:hasBiographicalInformation kaunokki:ownWords kaunokki:textSample kaunokki:hasWritten" #kaunokki:sameAs
personvars= personvars.split()
# loop
personstats = {}
for p in personvars:
    Q_ = Q.replace("kaunokki:occupation", p) # set the wanted variable
    
    sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
    sparql.setQuery(PREFIXES + Q_)
    sparql.setReturnFormat(JSON)
    sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
    results3 = sparql.query().convert()

    vars = results3['head']['vars']
    result = results3["results"]["bindings"][0]["result"]["value"] 
    print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
    personstats[p] = result
    

In [None]:
people=pd.DataFrame.from_dict(personstats,"index", columns=["hasValue"]).sort_values(by="hasValue", ascending=False).astype(float).round(4)
people

In [None]:
# do the same only looking at authors
Q =""" 

SELECT  ((?novalue/?total) AS ?result) WHERE {
SELECT (COUNT(DISTINCT ?person) AS ?total) (COUNT(DISTINCT ?personInfo) AS ?novalue) 
    WHERE

{
    
    ?person a foaf:Person . # all people
    ?person kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> .# kirjailijat
   OPTIONAL { 
    
    ?person a foaf:Person .
    FILTER EXISTS  { ?person variable ?info }
    BIND(?person AS ?personInfo) 
    
  } 
} 
}
"""

personvars=  "kaunokki:kansallisuus foaf:gender kaunokki:timeOfBirth kaunokki:timeOfDeath kaunokki:placeOfBirth kaunokki:placeOfDeath kaunokki:hasLivedIn kaunokki:aidinkieli kaunokki:hasBiographicalInformation kaunokki:ownWords kaunokki:textSample kaunokki:hasWritten" #kaunokki:sameAs
personvars= personvars.split()
# loop
personstats = {}
for p in personvars:
    Q_ = Q.replace("variable", p) # set the wanted variable
    
    sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
    sparql.setQuery(PREFIXES + Q_)
    sparql.setReturnFormat(JSON)
    sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
    results3 = sparql.query().convert()

    vars = results3['head']['vars']
    result = results3["results"]["bindings"][0]["result"]["value"] 
    print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
    personstats[p] = result

In [None]:
# Only authors
Q =""" 
SELECT  ((?novalue/?total) AS ?result) ((?novaluefi/?totalfi) AS ?resultfi) WHERE {
SELECT (COUNT(DISTINCT ?person) AS ?total) (COUNT(DISTINCT ?personInfo) AS ?novalue) (COUNT(DISTINCT ?personfi) AS ?totalfi) (COUNT(DISTINCT ?personInfofi) AS ?novaluefi) 
    WHERE

{
 
    {
    ?person a foaf:Person . # all people
    ?person kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> . # kirjailijat
    
   OPTIONAL { 
    ?person a foaf:Person .
    ?person kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> . # kirjailijat
    FILTER EXISTS  { ?person variable ?info }
    BIND(?person AS ?personInfo) 
  	} 
    }
    UNION
    {
      ?personfi a foaf:Person . # all people
    ?personfi kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> . # kirjailijat
      ?personfi kaunokki:kansallisuus <http://www.yso.fi/onto/koko/p35259>.
    
   OPTIONAL { 
    
    ?personfi a foaf:Person .
    ?personfi kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> . # kirjailijat
    ?personfi kaunokki:kansallisuus <http://www.yso.fi/onto/koko/p35259>.
    FILTER EXISTS  { ?personfi variable ?infofi }
    BIND(?personfi AS ?personInfofi) 
    }
    }
  }
}
"""

personvars=  "kaunokki:kansallisuus foaf:gender kaunokki:timeOfBirth kaunokki:timeOfDeath kaunokki:placeOfBirth kaunokki:placeOfDeath kaunokki:hasLivedIn kaunokki:aidinkieli kaunokki:hasBiographicalInformation kaunokki:ownWords kaunokki:textSample kaunokki:hasWritten" #kaunokki:sameAs
personvars= personvars.split()
# loop
authors=pd.DataFrame()
for p in personvars:
    Q_ = Q.replace("variable", p) # set the wanted variable
    
    sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
    sparql.setQuery(PREFIXES + Q_)
    sparql.setReturnFormat(JSON)
    sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
    results3 = sparql.query().convert()

    vars = results3['head']['vars']
    result = results3["results"]["bindings"][0]["result"]["value"] 
    print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
    temp=JSON2Pandas(results3)
    authors=pd.concat([authors,temp])

In [None]:
authors.index=personvars
authors=authors.sort_values(by="result", ascending=False).astype(float).round(4)
authors

In [None]:
#authors= pd.DataFrame.from_dict(personstats,"index", columns=["hasValue"]).sort_values(by="hasValue", ascending=False).astype(float).round(4)
pd.concat([authors,people], axis=1).rename(columns = {'result':'authors', 'resultfi':'authorsFin',"hasValue":"allPeople"})#, inplace = True)#, keys=["authors","all"])

In [None]:
# so, it is safe to draw conclusions about the mother tongue, gender or nationality, but other info is less documented

In [None]:
# then just count all people, all authors and Finnish authors
Q = """ 
SELECT (COUNT(DISTINCT ?anyperson) AS ?totalPeople)  (COUNT(DISTINCT ?person) AS ?authors)  (COUNT(DISTINCT ?personfi) AS ?finnishAuthors) 
    WHERE

{
  {
    ?anyperson a foaf:Person . # all people
  }
 UNION
    {
    ?person a foaf:Person . # all people
    ?person kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> . # kirjailijat
    }
    UNION
    {
      ?personfi a foaf:Person . # all people
    ?personfi kaunokki:occupation/skos:broader* <http://www.yso.fi/onto/koko/p35732> .# kirjailijat
      ?personfi kaunokki:kansallisuus <http://www.yso.fi/onto/koko/p35259>.
    
    }
    
  }


"""


sparql = SPARQLWrapper("http://ldf.fi/booksampo-2022/sparql")
sparql.setQuery(PREFIXES + Q)
sparql.setReturnFormat(JSON)
sparql.addCustomHttpHeader(*list(AUTHORIZATION_HEADER.items())[0])
results3 = sparql.query().convert()

vars = results3['head']['vars']
#result = results3["results"]["bindings"][0]["result"]["value"] 
print("Query returned {} result:".format(len(results3["results"]["bindings"])), result)
JSON2Pandas(results3)