Based on [Ksamsok-basics](https://gist.github.com/Abbe98/882a374350d20b980190c3148f787f5a) see also [Abbe98/awesome-k-samsok](https://github.com/Abbe98/awesome-k-samsok)

* this [Notebook](https://github.com/perrohdin/Bildhistoria-Masterdata-och-Wikidata/blob/main/Notebook/Ksamsok-skola.ipynb)

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2022-06-09 09:30:30.484994


In [2]:

import math
import requests

In [3]:
# K-samsök supports JOSN if given the following Accept header
headers = {
    'Accept': 'application/json'
}

# We will work with two of K-samsöks methods search/fields for getting data
# and statisticSearch for automatic statistics
endpoint = 'http://www.kulturarvsdata.se/ksamsok/api'
endpoint_fields = '{}?&x-api=test&method=search&hitsPerPage=500&recordSchema=xml'.format(endpoint)
endpoint_facet = '{}?&x-api=test&method=statisticSearch&removeBelow=1'.format(endpoint)

In [4]:
# K-samsök uses the query language CQL
# it allows you to create very advanced queries
# https://www.loc.gov/standards/sru/cql/
    
# K-samsök has a lot of fields that you can query:
# https://www.raa.se/hitta-information/k-samsok/att-anvanda-k-samsok/index-for-statistic-facet/
# https://www.raa.se/hitta-information/k-samsok/att-anvanda-k-samsok/ytterligare-index-for-sok/
    
# Ask K-samsök for photos with type folkskola
query = 'itemType=foto AND thumbnailExists=j AND itemMotiveWord=skola'
#query = 'itemType=foto AND thumbnailExists=j AND create_fromTime<=1890'
# Lets also specify which fields we want to recive
fields = 'itemLabel,itemDescription,create_fromTime,thumbnail,itemMotiveWord,copyright'

# the following is a generator
# a generator is similar to a function
# but insead of returning something once
# it returns mulityply things which you can loop over
# this particular generator uses K-samsöks methods search/fields to recive data
# you can resuse this generator in you own projects
def search_field_generator(query, fields):
    # initial query to know how many results we get
    query_url = '{}&query={}&fields={}&startRecord='.format(endpoint_fields, query, fields)
    r = requests.get(query_url, headers=headers)
    json = r.json()

    # K-samsök only returns 500 results in a single request
    # therefor we need to use the total number of results
    # to calculate the number of request we could potentially need to do
    total_results = json['result']['totalHits']
    required_n_requests = math.ceil(total_results / 500)

    # now we can start querying while keeping track of where in the results we are
    count = 0
    while required_n_requests > count:
        start_record = count * 500
        count += 1

        r = requests.get(query_url + str(start_record), headers=headers)
        response_data = r.json()

        for record in response_data['result']['records']['record']:
            # sometimes there are empty records and those has no fields :-(
            if not len(record) == 2:
                continue
                
            item_to_yield = {}
            
            # some fields can appear multiply times
            # therefor we need to merge those to lists if needed
            for field in record['field']:
                # if the field is already a list 
                if isinstance(item_to_yield.get(field['name'], False), list):
                    item_to_yield[field['name']].append(field['content'])
                # if it's not yet a list but we found the same field name/key again
                elif item_to_yield.get(field['name'], False):
                    item_to_yield[field['name']] = list([item_to_yield[field['name']], field['content']])
                # default to just a regular value
                else:
                    item_to_yield[field['name']] = field['content']

            yield item_to_yield

In [5]:
import pandas as pd
dfTot = pd.DataFrame()
# now we can loop our generator and print all the results
for record in search_field_generator(query, fields):
    print(record)
    #dfTot = pd.concat([dfTot,pd.DataFrame(record)])
# you can change the values of query and fields get the data you need

{'itemId': 'http://kulturarvsdata.se/HeM/fotografi/271977', 'itemLabel': 'fotografi', 'itemDescription': ['Foto', 'Fotografi av Praktiska Realskolan (Clemensskolan) med fresk av Erik Cederberg', 344, 'arkitektur', 532], 'create_fromTime': 1965, 'thumbnail': 'https://museum.helsingborg.se/web/image/tn/296379/65_346.jpg', 'itemMotiveWord': ['Praktiska realskolan', 'Fresk av Erik Cederberg', 'skola', 'interiörer', 'byggnad', 'Clemensskolan', 'smide']}
{'itemId': 'http://kulturarvsdata.se/HeM/fotografi/621473', 'itemLabel': 'fotografi', 'itemDescription': ['BI-OMRÅDET FREDRIKSDAL, DROTTNINGHÖG, DALHEM [på diaboxen]', 'Dalhem', 'Under 2013 utförde Kulturmagasinet dokumentation av bostadsområdet Drottninghög. Det gjordes intervjuer, fotodokumentation och insamling av fotografier och i viss mån, föremål.\n\nBilden lånades från Titti Möllerstedt och skannades. Bilderna kom ursprungligen från Drottninghögs fritidsgård och togs om hand av TM när fritidsgården lades ner 1995.', 'Foto', 'inlånat o

{'itemId': 'http://kulturarvsdata.se/Kulturen/fotografi/502201', 'itemLabel': 'fotografi, bok, sko, dörr, kvinna, fönster, lampa, student, lärare, skola, stol, interiör, skolbänk, tavla', 'itemDescription': 'Lund. K.v S.t Måns 21 Parkskolan.', 'thumbnail': 'https://carl.kulturen.com/web/image/tn/502205/A1595.jpg', 'itemMotiveWord': ['lärare', 'lampa', 'fönster', 'kvinna', 'student', 'dörr', 'bok', 'tavla', 'stol', 'skolbänk', 'sko', 'interiör', 'skola']}
{'itemId': 'http://kulturarvsdata.se/Kulturen/fotografi/1568228', 'itemLabel': 'fotografi, hus, träd, gata, stadsbebyggelse, gatsten, bilparkering, trottoar, skola, bil', 'itemDescription': 'Lund. [text saknas i liggare]', 'thumbnail': 'https://carl.kulturen.com/web/image/tn/1568232/LB2395.jpg', 'itemMotiveWord': ['gatsten', 'bil', 'skola', 'trottoar', 'träd', 'stadsbebyggelse', 'bilparkering', 'gata', 'hus']}
{'itemId': 'http://kulturarvsdata.se/Kulturen/fotografi/1534374', 'itemLabel': 'fotografi, stadsbebyggelse, skolgård, hus, balk

In [6]:
#dfTot