# Litteraturbanken Author  <-> API
version 0.2 this [notebook](https://github.com/salgo60/open-data-examples/blob/master/Litteraturbanken%20API.ipynb)

**TODO**
1. Check Litteraturbanken authors and compare Wikidata [P5101](https://www.wikidata.org/wiki/Property:P5101) to see if there is a mismatch
2. If copyright is CC-0 on metadata create objects for all books in Litteraturbanken in Wikidata see test

* this [Jupyter Notebook](https://github.com/salgo60/open-data-examples/blob/master/Litteraturbanken%20API.ipynb) 
* [SPARQL](https://w.wiki/cri) 
* [Litteraturbanken API](https://litteraturbanken.se/api/list_all/etext,faksimil,pdf?from=0&to=10000&include=lbworkid,titlepath,title,librisid,mediatype,main_author.author_id)

# Wikidata

In [1]:
from datetime import datetime
now = datetime.now()
print("Last run: ", datetime.now())

Last run:  2025-02-23 18:06:50.322915


In [2]:
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
import pandas as pd 

from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

query  = """SELECT ?item ?itemLabel ?authorid  WHERE {
?item wdt:P5101 ?authorid 
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?itemLabel)"""

def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDLitteraturbankenAuthor = get_sparql_dataframe(endpoint_url, query )


In [3]:
WDLitteraturbankenAuthor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4052 entries, 0 to 4051
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   item       4052 non-null   object
 1   itemLabel  4052 non-null   object
 2   authorid   4052 non-null   object
dtypes: object(3)
memory usage: 95.1+ KB


## Litteraturbanken 

In [4]:
import urllib3, json
import pandas as pd 
http = urllib3.PoolManager() 
pd.set_option("display.max.columns", None)
pd.set_option('display.max_rows', None)
    
#url = "https://litteraturbanken.se/api/list_all/etext,faksimil,pdf?from=0&to=10000&include=lbworkid,titlepath,title,librisid,mediatype,main_author.author_id"
url = "https://litteraturbanken.se/api/list_all/etext,faksimil,pdf?from=0&to=10000&include=lbworkid,titlepath,title,librisid,mediatype,main_author"
r = http.request('GET', url) 
data = json.loads(r.data.decode('utf-8')) 
#print(type(data))

listLitteraturbanken = []
i = 0 
print("Length:  ")
for row in (data["data"]):
    #print (row)
    new_item = dict()
    new_item['lbworkid'] = row["lbworkid"]
   
    try:
        librisid = row['librisid']        
        new_item['librisid'] = librisid
    except:
        pass
    try:
        mediatype = row["mediatype"]
        new_item['mediatype'] = mediatype
    except:
        pass
    try:
        title = row["title"]
        new_item['title'] = title
    except:
        pass
    try:
        titlepath = row["titlepath"]
        new_item['titlepath'] = titlepath
    except:
        pass
    try:
        main_author = row["main_author"]
        new_item['main_author'] = main_author
    except:
        pass
    try:
        authorid = row['main_author']["authorid"]
        new_item['authorid'] = authorid
    except:
        pass
    try:
        authorfull_name = row['main_author']["full_name"]
        new_item['author_fullname'] = authorfull_name
    except:
        pass
    try:
        name_for_index = row['main_author']["name_for_index"]
        new_item['name_for_index'] = name_for_index
    except:
        pass
    
    listLitteraturbanken.append(new_item)
    i = i + 1 
print (len(listLitteraturbanken) ," antal poster")


Length:  
10000  antal poster


In [5]:
import pandas as pd  
dfLitteraturbanken = pd.DataFrame(listLitteraturbanken)

In [6]:
dfLitteraturbanken.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   lbworkid         10000 non-null  object
 1   librisid         9511 non-null   object
 2   mediatype        10000 non-null  object
 3   title            10000 non-null  object
 4   titlepath        10000 non-null  object
 5   main_author      10000 non-null  object
 6   authorid         10000 non-null  object
 7   author_fullname  10000 non-null  object
 8   name_for_index   10000 non-null  object
dtypes: object(9)
memory usage: 703.3+ KB


In [7]:
dfLitteraturbanken.head()

Unnamed: 0,lbworkid,librisid,mediatype,title,titlepath,main_author,authorid,author_fullname,name_for_index
0,lb1210571,1210571,etext,Kati i Amerika,KatiIAmerika,"{'authorid': 'LindgrenA', 'authorid_norm': 'Li...",LindgrenA,Astrid Lindgren,"Lindgren, Astrid"
1,lb1894436,1894436,etext,Något att leva för,NågotAttLevaFör,"{'authorid': 'VallquistG', 'authorid_norm': 'V...",VallquistG,Gunnel Vallquist,"Vallquist, Gunnel"
2,lb1221508,1221508,etext,Historier från vägarna,HistorierFrånVägarna,"{'authorid': 'HagströmE', 'authorid_norm': 'Ha...",HagströmE,Emil Hagström,"Hagström, Emil"
3,lb1469084,1469084,etext,Briggen Tre Liljor,BriggenTreLiljor,"{'authorid': 'MattsonO', 'authorid_norm': 'Mat...",MattsonO,Olle Mattson,"Mattson, Olle"
4,lb8203261,8203261,etext,Inferno [svensk text] [SV],Inferno/InfernoSvenska,"{'authorid': 'StrindbergA', 'authorid_norm': '...",StrindbergA,August Strindberg,"Strindberg, August"


In [12]:
dfLitteraturbanken[dfLitteraturbanken.title.str.contains("tal")].head()

Unnamed: 0,lbworkid,librisid,mediatype,title,titlepath,main_author,authorid,author_fullname,name_for_index
22,lb8404157,8404157.0,etext,Samlade Verk. Nationalupplaga. 11. Tidiga 80-t...,Tidiga80talsdramer,"{'authorid': 'StrindbergA', 'authorid_norm': '...",StrindbergA,August Strindberg,"Strindberg, August"
35,lb7247748,7247748.0,etext,AB Neandertal,ABNeandertal,"{'authorid': 'HanssonGD', 'authorid_norm': 'Ha...",HanssonGD,Gunnar D Hansson,"Hansson, Gunnar D"
102,lb99904114,,etext,Inträdestal (20 december 1997),Inträdestal,"{'authorid': 'WästbergP', 'authorid_norm': 'Wa...",WästbergP,Per Wästberg,"Wästberg, Per"
190,lb99904116,,etext,Kontinentaldriften och den okända provinsen. A...,Kontinentaldriften,"{'authorid': 'WästbergP', 'authorid_norm': 'Wa...",WästbergP,Per Wästberg,"Wästberg, Per"
196,lb7281564,7281564.0,etext,Ett år på sextiotalet,EttÅrPåSextiotalet,"{'authorid': 'LagercrantzO', 'authorid_norm': ...",LagercrantzO,Olof Lagercrantz,"Lagercrantz, Olof"


In [9]:
dfLitteraturbanken.author_fullname.value_counts() .head(10) 

author_fullname
Anonym                      837
August Strindberg           372
Carl Jonas Love Almqvist    111
Jon Olof Åberg              111
Selma Lagerlöf               76
Carl Michael Bellman         64
Marie Sophie Schwartz        62
Pehr Thomasson               58
Wilhelmina Stålberg          50
Zacharias Topelius           50
Name: count, dtype: int64

In [10]:
dfLitteraturbanken.authorid.value_counts()

authorid
Anonym                      837
StrindbergA                 372
AlmqvistCJL                 111
ÅbergJO                     111
LagerlöfS                    76
BellmanCM                    64
SchwartzMS                   62
ThomassonP                   58
StålbergW                    50
TopeliusZ                    50
EkelundV                     47
TegnérE                      46
FlygareCarlénE               44
ShakespeareW                 42
WideAG                       40
HedbergF                     40
GeijerEG                     39
WahlenbergA                  39
DagermanS                    39
RunebergJL                   38
BremerF                      37
AtterbomPDA                  37
BlancheA                     35
MartinsonH                   34
WetterberghCA                34
MellinGH                     31
AhlstromO                    31
HagströmE                    31
SöderbergH                   31
RydbergV                     29
BeskowE                      27