# Litteraturbanken Author  <-> API
version 0.2 this [notebook](https://github.com/salgo60/open-data-examples/blob/master/Litteraturbanken%20API.ipynb)

**TODO**
1. Check Litteraturbanken authors and compare Wikidata [P5101](https://www.wikidata.org/wiki/Property:P5101) to see if there is a mismatch
2. If copyright is CC-0 on metadata create objects for all books in Litteraturbanken in Wikidata see test

* this [Jupyter Notebook](https://github.com/salgo60/open-data-examples/blob/master/Litteraturbanken%20API.ipynb) 
* [SPARQL](https://w.wiki/cri) 
* [Litteraturbanken API](https://litteraturbanken.se/api/list_all/etext,faksimil,pdf?from=0&to=10000&include=lbworkid,titlepath,title,librisid,mediatype,main_author.author_id)

# Wikidata

In [1]:
from datetime import datetime
now = datetime.now()
print("Last run: ", datetime.now())

Last run:  2020-09-22 02:46:59.665786


In [2]:
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
import pandas as pd 

from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

query  = """SELECT ?item ?itemLabel ?authorid  WHERE {
?item wdt:P5101 ?authorid 
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?itemLabel)"""

def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDLitteraturbankenAuthor = get_sparql_dataframe(endpoint_url, query )


In [3]:
WDLitteraturbankenAuthor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1758 entries, 0 to 1757
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   item       1758 non-null   object
 1   itemLabel  1758 non-null   object
 2   authorid   1758 non-null   object
dtypes: object(3)
memory usage: 41.3+ KB


## Litteraturbanken 

In [4]:
import urllib3, json
import pandas as pd 
http = urllib3.PoolManager() 
pd.set_option("display.max.columns", None)
pd.set_option('display.max_rows', None)
    
#url = "https://litteraturbanken.se/api/list_all/etext,faksimil,pdf?from=0&to=10000&include=lbworkid,titlepath,title,librisid,mediatype,main_author.author_id"
url = "https://litteraturbanken.se/api/list_all/etext,faksimil,pdf?from=0&to=10000&include=lbworkid,titlepath,title,librisid,mediatype,main_author"
r = http.request('GET', url) 
data = json.loads(r.data.decode('utf-8')) 
#print(type(data))

listLitteraturbanken = []
i = 0 
print("Length:  ")
for row in (data["data"]):
    #print (row)
    new_item = dict()
    new_item['lbworkid'] = row["lbworkid"]
   
    try:
        librisid = row['librisid']        
        new_item['librisid'] = librisid
    except:
        pass
    try:
        mediatype = row["mediatype"]
        new_item['mediatype'] = mediatype
    except:
        pass
    try:
        title = row["title"]
        new_item['title'] = title
    except:
        pass
    try:
        titlepath = row["titlepath"]
        new_item['titlepath'] = titlepath
    except:
        pass
    try:
        main_author = row["main_author"]
        new_item['main_author'] = main_author
    except:
        pass
    try:
        authorid = row['main_author']["authorid"]
        new_item['authorid'] = authorid
    except:
        pass
    try:
        authorfull_name = row['main_author']["full_name"]
        new_item['author_fullname'] = authorfull_name
    except:
        pass
    try:
        name_for_index = row['main_author']["name_for_index"]
        new_item['name_for_index'] = name_for_index
    except:
        pass
    
    listLitteraturbanken.append(new_item)
    i = i + 1 
print (len(listLitteraturbanken) ," antal poster")


Length:  
5196  antal poster


In [5]:
import pandas as pd  
dfLitteraturbanken = pd.DataFrame(listLitteraturbanken)

In [6]:
dfLitteraturbanken.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5196 entries, 0 to 5195
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   lbworkid         5196 non-null   object
 1   librisid         4693 non-null   object
 2   mediatype        5196 non-null   object
 3   title            5196 non-null   object
 4   titlepath        5196 non-null   object
 5   main_author      5196 non-null   object
 6   authorid         5196 non-null   object
 7   author_fullname  5196 non-null   object
 8   name_for_index   5196 non-null   object
dtypes: object(9)
memory usage: 365.5+ KB


In [7]:
dfLitteraturbanken.head()

Unnamed: 0,lbworkid,librisid,mediatype,title,titlepath,main_author,authorid,author_fullname,name_for_index
0,lb10274,10274,etext,En bukett av Viola,EnBukettAvViola,"{'authorid': 'ZetterströmMarianne', 'authorid_...",ZetterströmMarianne,Marianne Zetterström,"Zetterström, Marianne"
1,lb10332355,10332355,etext,Emma Gloria och de Levande Varslarna,EmmaGloria1,"{'authorid': 'AnderssonH', 'authorid_norm': 'A...",AnderssonH,Henrika Andersson,"Andersson, Henrika"
2,lb10416,10416,etext,Samlade skrifter [8]. Bebådelse. Noveller [1948],Bebådelse1948,"{'authorid': 'BoyeK', 'authorid_norm': 'BoyeK'...",BoyeK,Karin Boye,"Boye, Karin"
3,lb10420,10420,etext,Samlade skrifter [11]. Varia,Varia,"{'authorid': 'BoyeK', 'authorid_norm': 'BoyeK'...",BoyeK,Karin Boye,"Boye, Karin"
4,lb10163208,10163208,etext,Calle Wetterlind. En lustig visa om spektaklet...,CalleWetterlind,"{'authorid': 'Anonym', 'authorid_norm': 'Anony...",Anonym,Anonym,Anonym


In [8]:
dfLitteraturbanken[dfLitteraturbanken.title.str.contains("tal")]

Unnamed: 0,lbworkid,librisid,mediatype,title,titlepath,main_author,authorid,author_fullname,name_for_index
26,lb12103,12103.0,etext,Höst. Berättelser och tal,Höst,"{'authorid': 'LagerlöfS', 'authorid_norm': 'La...",LagerlöfS,Selma Lagerlöf,"Lagerlöf, Selma"
117,lb1591967,1591967.0,etext,Nordisk kvinnolitteraturhistoria 2. Fadershuse...,NordiskKvinnolitteraturhistoria2,"{'authorid': 'MøllerJensenE', 'authorid_norm':...",MøllerJensenE,Elisabeth Møller Jensen,"Møller Jensen, Elisabeth"
128,lb1595136,1595136.0,etext,Bilder från Italien,BilderFrånItalien,"{'authorid': 'AgrellA', 'authorid_norm': 'Agre...",AgrellA,Alfhild Agrell,"Agrell, Alfhild"
160,lb1640370,1640370.0,etext,Den talangfulla draken. Historier,DenTalangfulla,"{'authorid': 'SöderbergH', 'authorid_norm': 'S...",SöderbergH,Hjalmar Söderberg,"Söderberg, Hjalmar"
205,lb2067559,2067559.0,etext,Minnes-tal öfver erke-biskopen m.m. herr d:r J...,MinnestalÖfverWallin,"{'authorid': 'GeijerEG', 'authorid_norm': 'Gei...",GeijerEG,Erik Gustaf Geijer,"Geijer, Erik Gustaf"
233,lb2429700,2429700.0,etext,Åminnelsetal öfver Högstsalige Hans Majestät K...,ÅminnelsetalÖfverGustafIII,"{'authorid': 'HöijerB', 'authorid_norm': 'Hoij...",HöijerB,Benjamin Höijer,"Höijer, Benjamin"
268,lb2691863,2691863.0,etext,"Hallå, hallå! Grönköping-Motala. Radioaktiva r...",HallåHallåGrönköping,"{'authorid': 'HasselskogN', 'authorid_norm': '...",HasselskogN,Nils Hasselskog,"Hasselskog, Nils"
276,lb2839329,2839329.0,etext,Wermlänningarne. Sorglustigt tal- sång och dan...,Wermlänningarne,"{'authorid': 'DahlgrenFA', 'authorid_norm': 'D...",DahlgrenFA,Fredrik August Dahlgren,"Dahlgren, Fredrik August"
320,lb99904093,,etext,Gustaf den tredje med de förste Aderton af Sve...,GustafDenTredjeMed,"{'authorid': 'FranzénFM', 'authorid_norm': 'Fr...",FranzénFM,Frans Michael Franzén,"Franzén, Frans Michael"
324,lb99904079,,etext,Inträdes-tal hållet i Svenska Akademien Den 29...,Inträdestal1840,"{'authorid': 'AtterbomPDA', 'authorid_norm': '...",AtterbomPDA,Per Daniel Amadeus Atterbom,"Atterbom, P.D.A."


In [9]:
dfLitteraturbanken.author_fullname.value_counts()  

August Strindberg                                   492
Anonym                                              388
Carl Jonas Love Almqvist                            125
Carl Michael Bellman                                 75
Selma Lagerlöf                                       74
Anna Greta Wide                                      56
Vilhelm Ekelund                                      55
William Shakespeare                                  51
Fredrika Bremer                                      50
Harry Martinson                                      41
Victoria Benedictsson                                36
Anna Maria Roos                                      36
Per Daniel Amadeus Atterbom                          36
Hedvig Charlotta Nordenflycht                        36
Anna Wahlenberg                                      34
Esaias Tegnér                                        33
Hjalmar Söderberg                                    32
Heliga Birgitta                                 