## Svenska Akademin  - Wikidata
version 2.2

WD egenskap [Property:P5325](https://www.wikidata.org/wiki/Property:P5325) 
* this [notebook](https://github.com/salgo60/open-data-examples/blob/master/Svenska%20Akademin.ipynb)  
* Wikidata [WikiProject_Sweden/Swedish_Academy](https://www.wikidata.org/wiki/Wikidata:WikiProject_Sweden/Swedish_Academy)
  * SPARQL -> [objects maintained by the project](https://w.wiki/XTB)
----
 
1. [query Wikidata](https://w.wiki/XBp) for people im the Swedish Academy
1. using [Property:P5325](https://www.wikidata.org/wiki/Property:P5325)  to access the page and webscrape some data
1. create ds with numbering and replace/replaced by
1. use Open Refine for connect it to WIkidata
1. batch upload using [Quickstatement](https://quickstatements.toolforge.org/#/)


In [185]:
import urllib3, json
import pandas as pd   
from bs4 import BeautifulSoup
import sys
import pprint
from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm.notebook import trange  
from wikidataintegrator import wdi_core, wdi_login

endpoint_url = "https://query.wikidata.org/sparql"

SparqlQuery = """SELECT ?item ?svaid WHERE {
?item wdt:P5325 ?svaid
}"""

# query 2 https://w.wiki/XBp
SparqlQuery_version2 = """SELECT ?item ?svaid ?chairLabel ?chairOrder ?start ?end ?ordinalnr ?replacedby ?replacedbyLabel ?replaces ?replacesLabel WHERE {
  ?item wdt:P5325 ?svaid.
  OPTIONAL {
    ?item p:P39 ?Pchair.
    ?Pchair ps:P39 ?chair.
    ?chair wdt:P361 wd:Q207360.
    OPTIONAL { ?chair wdt:P1545 ?chairOrder}
    OPTIONAL { ?Pchair pq:P580 ?start. }
    OPTIONAL { ?Pchair pq:P582 ?end. }
    OPTIONAL { ?Pchair pq:P1365 ?replaces. }
    OPTIONAL { ?Pchair pq:P1366 ?replacedby. }
    OPTIONAL { ?Pchair pq:P1545 ?ordinalnr. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
} order by xsd:integer(?order) xsd:integer(?ordinalnr)"""

http = urllib3.PoolManager()

# Query https://w.wiki/Vo5
def get_results(endpoint_url, query):
    user_agent = "user  salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

def getChair (sakid):
    url = "http://www.svenskaakademien.se/svenska-akademien/ledamotsregister/" + sakid
    r = http.request('GET', url) 
    soup = BeautifulSoup(r.data, "html.parser")
    try:
        stol = soup.findAll("div", {"class": "field-name-field-stol"})[0].get_text()
        stol = stol.replace(u'Stol:\xa0', u'')
    except:
        stol =""
    try: 
        intrade = soup.findAll("div", {"class": "field-name-field-tog-intrade"})[0].get_text()
        intrade = intrade.replace(u'Inträde:\xa0',u'')
    except:
        intrade =""
    try: 
        avled = soup.findAll("div", {"class": "field-name-field-avled"})[0].get_text()
        avled = avled.replace(u'Utträde:\xa0',u'')
    except:
        avled =""
    return stol,intrade,avled

SparQlResults = get_results(endpoint_url, SparqlQuery_version2)
length = len (SparQlResults["results"]["bindings"])
df = pd.DataFrame(columns=['WD', 'SvenskaAkademin', 'wdReplace', 'wdReplaceBy', 'wdStart', 'wdEnd',
                           'wdOrdinal','chairOrder','SvChair', 'SvStart', 'SvEnd'])
for r in trange(0,length):
    resultSparql = SparQlResults["results"]["bindings"][r]
    wd = resultSparql["item"]["value"].replace("http://www.wikidata.org/entity/","") 
    try:
        wdReplace = resultSparql["item"]["value"].replace("http://www.wikidata.org/entity/","") 
    except:
        wdReplace = ""
    try:
        wdReplaceBy = resultSparql["item"]["value"].replace("http://www.wikidata.org/entity/","") 
    except:
        wdReplaceBy = ""
    try: 
        wdStart = resultSparql["start"]["value"] 
    except:
        wdStart = ""    
    try:
        wdEnd = resultSparql["end"]["value"] 
    except:
        wdEnd = ""
    try:
        wdOrdinal = resultSparql["ordinalnr"]["value"] 
    except:
        wdOrdinal = ""
    try:
        chairOrder = resultSparql["chairOrder"]["value"] 
    except:
        chairOrder = ""
    try:
        sakid = resultSparql["svaid"]["value"]
    except:
        sakid = ""
    chair, start, end = getChair(sakid) 
    df = df.append({'WD': wd, 'SvChair': chair, 'SvStart': start, 'SvEnd': end, 'SvenskaAkademin': sakid, 
                   'wdReplace' : wdReplace, 'wdReplaceBy' : wdReplaceBy, 'wdStart' : wdStart, 'wdEnd' : wdEnd,
                   'wdOrdinal' : wdOrdinal, 'chairOrder': chairOrder}, ignore_index=True)
  

HBox(children=(FloatProgress(value=0.0, max=197.0), HTML(value='')))




In [217]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 12 columns):
WD                 197 non-null object
SvenskaAkademin    197 non-null object
wdReplace          197 non-null object
wdReplaceBy        197 non-null object
wdStart            80 non-null object
wdEnd              72 non-null datetime64[ns, UTC]
wdOrdinal          197 non-null object
chairOrder         197 non-null object
SvChair            197 non-null int64
SvStart            196 non-null datetime64[ns]
SvEnd              182 non-null datetime64[ns]
wdEndDate          72 non-null object
dtypes: datetime64[ns, UTC](1), datetime64[ns](2), int64(1), object(8)
memory usage: 18.6+ KB


In [218]:
df.head(200)

Unnamed: 0,WD,SvenskaAkademin,wdReplace,wdReplaceBy,wdStart,wdEnd,wdOrdinal,chairOrder,SvChair,SvStart,SvEnd,wdEndDate
0,Q93137,lagerkvist-par,Q93137,Q93137,NaT,NaT,,,8,1940-12-20,1974-07-11,NaT
1,Q129173,heidenstam-verner-von,Q129173,Q129173,NaT,NaT,,,8,1912-12-20,1940-05-20,NaT
2,Q151911,berzelius-jacob,Q151911,Q151911,NaT,NaT,,,5,1837-12-20,1848-08-07,NaT
3,Q154759,hedin-sven,Q154759,Q154759,NaT,NaT,,,6,1913-12-20,1952-11-26,NaT
4,Q156749,nordenskiold-adolf,Q156749,Q156749,NaT,NaT,,,12,1893-12-20,1901-08-12,NaT
5,Q194665,svenbro-jesper,Q194665,Q194665,NaT,NaT,,,8,2006-12-20,NaT,NaT
6,Q259238,trotzig-birgitta,Q259238,Q259238,NaT,NaT,,,6,1993-12-20,2011-05-14,NaT
7,Q263017,stridsberg-sara,Q263017,Q263017,NaT,NaT,,,13,2016-12-20,2018-05-03,NaT
8,Q270468,wagner-elin,Q270468,Q270468,NaT,NaT,,,15,1944-12-20,1949-01-07,NaT
9,Q271314,lugn-kristina,Q271314,Q271314,NaT,NaT,,,14,2006-12-20,2020-05-09,NaT


In [219]:
df['wdStart'] = pd.to_datetime(df['wdStart'])
df['wdEnd'] = pd.to_datetime(df['wdEnd'])
df['wdStart'] = df['wdStart'].dt.date  
df['wdEndDate'] = df['wdEnd'].dt.date 
#df['wdStartDate'] = df['wdStart'].dt.date 
df['SvStart'] = pd.to_datetime(df['SvStart']) 
df['SvEnd'] = pd.to_datetime(df['SvEnd']) 
df['SvChair'] = df['SvChair'].astype(int)
#df['chairOrder'] = df['chairOrder'].astype(int)

In [220]:
df.head(100)


Unnamed: 0,WD,SvenskaAkademin,wdReplace,wdReplaceBy,wdStart,wdEnd,wdOrdinal,chairOrder,SvChair,SvStart,SvEnd,wdEndDate
0,Q93137,lagerkvist-par,Q93137,Q93137,NaT,NaT,,,8,1940-12-20,1974-07-11,NaT
1,Q129173,heidenstam-verner-von,Q129173,Q129173,NaT,NaT,,,8,1912-12-20,1940-05-20,NaT
2,Q151911,berzelius-jacob,Q151911,Q151911,NaT,NaT,,,5,1837-12-20,1848-08-07,NaT
3,Q154759,hedin-sven,Q154759,Q154759,NaT,NaT,,,6,1913-12-20,1952-11-26,NaT
4,Q156749,nordenskiold-adolf,Q156749,Q156749,NaT,NaT,,,12,1893-12-20,1901-08-12,NaT
5,Q194665,svenbro-jesper,Q194665,Q194665,NaT,NaT,,,8,2006-12-20,NaT,NaT
6,Q259238,trotzig-birgitta,Q259238,Q259238,NaT,NaT,,,6,1993-12-20,2011-05-14,NaT
7,Q263017,stridsberg-sara,Q263017,Q263017,NaT,NaT,,,13,2016-12-20,2018-05-03,NaT
8,Q270468,wagner-elin,Q270468,Q270468,NaT,NaT,,,15,1944-12-20,1949-01-07,NaT
9,Q271314,lugn-kristina,Q271314,Q271314,NaT,NaT,,,14,2006-12-20,2020-05-09,NaT


In [221]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 12 columns):
WD                 197 non-null object
SvenskaAkademin    197 non-null object
wdReplace          197 non-null object
wdReplaceBy        197 non-null object
wdStart            80 non-null object
wdEnd              72 non-null datetime64[ns, UTC]
wdOrdinal          197 non-null object
chairOrder         197 non-null object
SvChair            197 non-null int64
SvStart            196 non-null datetime64[ns]
SvEnd              182 non-null datetime64[ns]
wdEndDate          72 non-null object
dtypes: datetime64[ns, UTC](1), datetime64[ns](2), int64(1), object(8)
memory usage: 18.6+ KB


In [191]:
pd.set_option("display.max.rows", None) 

In [222]:
akstolar = df.sort_values(["SvChair","SvStart"])
akstolar

Unnamed: 0,WD,SvenskaAkademin,wdReplace,wdReplaceBy,wdStart,wdEnd,wdOrdinal,chairOrder,SvChair,SvStart,SvEnd,wdEndDate
165,Q491083,hopken-anders-johan-von,Q491083,Q491083,1786-04-05,1789-05-09 00:00:00+00:00,1.0,1.0,1,1786-04-05,1789-05-09,1789-05-09
138,Q4358870,gyldenstolpe-nils-philip,Q4358870,Q4358870,1789-06-18,1810-02-20 00:00:00+00:00,2.0,1.0,1,1789-12-11,1810-02-20,1810-02-20
187,Q1355965,wallin-johan-olof,Q1355965,Q1355965,1811-05-29,1839-06-30 00:00:00+00:00,3.0,1.0,1,1811-05-29,1839-06-30,1839-06-30
119,Q1959091,fryxell-anders,Q1959091,Q1959091,1841-05-23,1881-03-21 00:00:00+00:00,4.0,1.0,1,1841-05-23,1881-03-21,1881-03-21
126,Q2291097,forssell-hans,Q2291097,Q2291097,1881-12-20,1901-07-31 00:00:00+00:00,5.0,1.0,1,1881-12-20,1901-07-31,1901-07-31
122,Q2070822,bildt-carl,Q2070822,Q2070822,1902-06-20,1931-01-26 00:00:00+00:00,6.0,1.0,1,1902-06-20,1931-01-26,1931-01-26
124,Q2193692,wedberg-birger,Q2193692,Q2193692,1931-12-20,1945-10-02 00:00:00+00:00,7.0,1.0,1,1931-12-20,1945-10-02,1945-10-02
125,Q2286404,ekeberg-birger,Q2286404,Q2286404,1945-12-20,1968-11-30 00:00:00+00:00,8.0,1.0,1,1945-12-20,1968-11-30,1968-11-30
175,Q736172,petren-sture,Q736172,Q736172,1969-12-20,1976-12-13 00:00:00+00:00,9.0,1.0,1,1969-12-20,1976-12-13,1976-12-13
188,Q1358507,rudholm-sten,Q1358507,Q1358507,1977-12-20,2008-11-29 00:00:00+00:00,10.0,1.0,1,1977-12-20,2008-11-29,2008-11-29


In [224]:
akstolar = akstolar.reset_index()


In [225]:
stolar = {1: "Q96599799", 2: "Q96600293",3: "Q96600298", 4: "Q96600304", 5:"Q96600314",
         6: "Q96600324", 7: "Q96600329", 8: "Q96600338", 9: "Q96600350", 10:"Q96600362",
         11: "Q96600367", 12: "Q96600377", 13: "Q96600389", 14: "Q96600400", 15: "Q96600412",
         16: "Q96600424", 17: "Q96600443", 18:"Q96600458"}

In [226]:
akstolar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 13 columns):
index              197 non-null int64
WD                 197 non-null object
SvenskaAkademin    197 non-null object
wdReplace          197 non-null object
wdReplaceBy        197 non-null object
wdStart            80 non-null object
wdEnd              72 non-null datetime64[ns, UTC]
wdOrdinal          197 non-null object
chairOrder         197 non-null object
SvChair            197 non-null int64
SvStart            196 non-null datetime64[ns]
SvEnd              182 non-null datetime64[ns]
wdEndDate          72 non-null object
dtypes: datetime64[ns, UTC](1), datetime64[ns](2), int64(2), object(8)
memory usage: 20.1+ KB


In [229]:
# add columns for WD Object Chair and replace and replaced by
orderNumber = 0
for i, row in enumerate(akstolar.itertuples()):
    beforeId = i-1 
    replaceWD = ""
    replacedByWD = ""
    if akstolar.iloc[beforeId]["SvChair"] == akstolar.iloc[i]["SvChair"]:
        #print("\t",beforeId,"\t",akstolar.iloc[beforeId]["SvenskaAkademin"], akstolar.iloc[beforeId]["SvChair"])
        replaceWD = akstolar.iloc[beforeId]["WD"]
        orderNumber = orderNumber + 1 
        akstolar.at[i,"replaceWD"] = replaceWD
    else:
        orderNumber = 1
    akstolar.at[i,"orderNumber"] = orderNumber
    afterId  = i+1 
    if afterId < len(akstolar):
        if akstolar.iloc[afterId]["SvChair"] == akstolar.iloc[i]["SvChair"]:
            #print("\t",afterId,"\t",akstolar.iloc[afterId]["SvenskaAkademin"], akstolar.iloc[afterId]["SvChair"])
            replacedByWD = akstolar.iloc[afterId]["WD"]
            akstolar.at[i,"replacedByWD"] = replacedByWD
    print(i,row.WD,row.SvChair,orderNumber,row.SvenskaAkademin,row.SvStart,row.SvEnd,replaceWD,replacedByWD)
    akstolar.at[i,"newWDChair"] = stolar[row.SvChair] 
    akstolar.at[i,"orderNumber"] = orderNumber

0 Q491083 1 1 hopken-anders-johan-von 1786-04-05 00:00:00 1789-05-09 00:00:00  Q4358870
1 Q4358870 1 2 gyldenstolpe-nils-philip 1789-12-11 00:00:00 1810-02-20 00:00:00 Q491083 Q1355965
2 Q1355965 1 3 wallin-johan-olof 1811-05-29 00:00:00 1839-06-30 00:00:00 Q4358870 Q1959091
3 Q1959091 1 4 fryxell-anders 1841-05-23 00:00:00 1881-03-21 00:00:00 Q1355965 Q2291097
4 Q2291097 1 5 forssell-hans 1881-12-20 00:00:00 1901-07-31 00:00:00 Q1959091 Q2070822
5 Q2070822 1 6 bildt-carl 1902-06-20 00:00:00 1931-01-26 00:00:00 Q2291097 Q2193692
6 Q2193692 1 7 wedberg-birger 1931-12-20 00:00:00 1945-10-02 00:00:00 Q2070822 Q2286404
7 Q2286404 1 8 ekeberg-birger 1945-12-20 00:00:00 1968-11-30 00:00:00 Q2193692 Q736172
8 Q736172 1 9 petren-sture 1969-12-20 00:00:00 1976-12-13 00:00:00 Q2286404 Q1358507
9 Q1358507 1 10 rudholm-sten 1977-12-20 00:00:00 2008-11-29 00:00:00 Q736172 Q264036
10 Q264036 1 11 lotass-lotta 2009-12-20 00:00:00 2018-05-07 00:00:00 Q1358507 Q55338841
11 Q55338841 1 12 runesson-eric-

102 Q1348997 10 5 reuterdahl-henrik 1852-12-20 00:00:00 1870-06-28 00:00:00 Q3658762 Q5755434
103 Q5755434 10 6 genberg-paul 1872-06-05 00:00:00 1875-09-29 00:00:00 Q1348997 Q1040670
104 Q1040670 10 7 snoilsky-carl 1876-12-20 00:00:00 1903-05-19 00:00:00 Q5755434 Q1959642
105 Q1959642 10 8 hjarne-harald 1903-12-20 00:00:00 1922-01-06 00:00:00 Q1040670 Q325547
106 Q325547 10 9 book-fredrik 1922-12-20 00:00:00 1961-12-02 00:00:00 Q1959642 Q3352137
107 Q3352137 10 10 lonnroth-erik 1962-12-20 00:00:00 2002-03-10 00:00:00 Q325547 Q347695
108 Q347695 10 11 englund-peter 2002-12-20 00:00:00 NaT Q3352137 
109 Q6080164 11 1 rosenstein-nils-von 1786-04-05 00:00:00 1824-08-07 00:00:00  Q5711087
110 Q5711087 11 2 enberg-lars-magnus 1825-02-21 00:00:00 1865-11-20 00:00:00 Q6080164 Q990655
111 Q990655 11 3 hildebrand-bror-emil 1866-12-20 00:00:00 1884-08-30 00:00:00 Q5711087 Q731379
112 Q731379 11 4 odhner-clas-theodor 1885-12-20 00:00:00 1904-06-11 00:00:00 Q990655 Q134641
113 Q134641 11 5 karlfeld

In [230]:
akstolar['orderNumber'] = pd.to_numeric(akstolar['orderNumber'],downcast='integer') 
akstolar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 17 columns):
index              197 non-null int64
WD                 197 non-null object
SvenskaAkademin    197 non-null object
wdReplace          197 non-null object
wdReplaceBy        197 non-null object
wdStart            80 non-null object
wdEnd              72 non-null datetime64[ns, UTC]
wdOrdinal          197 non-null object
chairOrder         197 non-null object
SvChair            197 non-null int64
SvStart            196 non-null datetime64[ns]
SvEnd              182 non-null datetime64[ns]
wdEndDate          72 non-null object
orderNumber        197 non-null int8
replacedByWD       179 non-null object
replaceWD          179 non-null object
newWDChair         197 non-null object
dtypes: datetime64[ns, UTC](1), datetime64[ns](2), int64(2), int8(1), object(11)
memory usage: 24.9+ KB


In [232]:
akstolar

Unnamed: 0,index,WD,SvenskaAkademin,wdReplace,wdReplaceBy,wdStart,wdEnd,wdOrdinal,chairOrder,SvChair,SvStart,SvEnd,wdEndDate,orderNumber,replacedByWD,replaceWD,newWDChair
0,165,Q491083,hopken-anders-johan-von,Q491083,Q491083,1786-04-05,1789-05-09 00:00:00+00:00,1.0,1.0,1,1786-04-05,1789-05-09,1789-05-09,1,Q4358870,,Q96599799
1,138,Q4358870,gyldenstolpe-nils-philip,Q4358870,Q4358870,1789-06-18,1810-02-20 00:00:00+00:00,2.0,1.0,1,1789-12-11,1810-02-20,1810-02-20,2,Q1355965,Q491083,Q96599799
2,187,Q1355965,wallin-johan-olof,Q1355965,Q1355965,1811-05-29,1839-06-30 00:00:00+00:00,3.0,1.0,1,1811-05-29,1839-06-30,1839-06-30,3,Q1959091,Q4358870,Q96599799
3,119,Q1959091,fryxell-anders,Q1959091,Q1959091,1841-05-23,1881-03-21 00:00:00+00:00,4.0,1.0,1,1841-05-23,1881-03-21,1881-03-21,4,Q2291097,Q1355965,Q96599799
4,126,Q2291097,forssell-hans,Q2291097,Q2291097,1881-12-20,1901-07-31 00:00:00+00:00,5.0,1.0,1,1881-12-20,1901-07-31,1901-07-31,5,Q2070822,Q1959091,Q96599799
5,122,Q2070822,bildt-carl,Q2070822,Q2070822,1902-06-20,1931-01-26 00:00:00+00:00,6.0,1.0,1,1902-06-20,1931-01-26,1931-01-26,6,Q2193692,Q2291097,Q96599799
6,124,Q2193692,wedberg-birger,Q2193692,Q2193692,1931-12-20,1945-10-02 00:00:00+00:00,7.0,1.0,1,1931-12-20,1945-10-02,1945-10-02,7,Q2286404,Q2070822,Q96599799
7,125,Q2286404,ekeberg-birger,Q2286404,Q2286404,1945-12-20,1968-11-30 00:00:00+00:00,8.0,1.0,1,1945-12-20,1968-11-30,1968-11-30,8,Q736172,Q2193692,Q96599799
8,175,Q736172,petren-sture,Q736172,Q736172,1969-12-20,1976-12-13 00:00:00+00:00,9.0,1.0,1,1969-12-20,1976-12-13,1976-12-13,9,Q1358507,Q2286404,Q96599799
9,188,Q1358507,rudholm-sten,Q1358507,Q1358507,1977-12-20,2008-11-29 00:00:00+00:00,10.0,1.0,1,1977-12-20,2008-11-29,2008-11-29,10,Q264036,Q736172,Q96599799


In [236]:
akstolar.loc[:,("WD","SvenskaAkademin","newWDChair","SvStart","SvEnd","orderNumber","replaceWD","replacedByWD")]

Unnamed: 0,WD,SvenskaAkademin,newWDChair,SvStart,SvEnd,orderNumber,replaceWD,replacedByWD
0,Q491083,hopken-anders-johan-von,Q96599799,1786-04-05,1789-05-09,1,,Q4358870
1,Q4358870,gyldenstolpe-nils-philip,Q96599799,1789-12-11,1810-02-20,2,Q491083,Q1355965
2,Q1355965,wallin-johan-olof,Q96599799,1811-05-29,1839-06-30,3,Q4358870,Q1959091
3,Q1959091,fryxell-anders,Q96599799,1841-05-23,1881-03-21,4,Q1355965,Q2291097
4,Q2291097,forssell-hans,Q96599799,1881-12-20,1901-07-31,5,Q1959091,Q2070822
5,Q2070822,bildt-carl,Q96599799,1902-06-20,1931-01-26,6,Q2291097,Q2193692
6,Q2193692,wedberg-birger,Q96599799,1931-12-20,1945-10-02,7,Q2070822,Q2286404
7,Q2286404,ekeberg-birger,Q96599799,1945-12-20,1968-11-30,8,Q2193692,Q736172
8,Q736172,petren-sture,Q96599799,1969-12-20,1976-12-13,9,Q2286404,Q1358507
9,Q1358507,rudholm-sten,Q96599799,1977-12-20,2008-11-29,10,Q736172,Q264036
