# Xsearch - Libris/SwePub

In [1]:
import requests
import pickle
from lib.Publications import Publication

# Query string

In [2]:
query = "eDNA"
# Query Libris or SwePub
db = "swepub" 

# Search Libris

In [3]:
def search_db(query, start=1):
    # http://librishelp.libris.kb.se/help/xsearch_swe.jsp?open=tech
    return requests.get(f'https://libris.kb.se/xsearch?query=%C3%A4mne:{query}&order=alphabetical&format=json&start={start}&n=200&format_level=full&database={db}')


In [4]:
# The maximum number of records to retreive when using Xsearch is 200.
# This loop will download all records if this limit is exceded.

# Make the first database search
start = 1
results = []
r = search_db(query)
results.append(r)

# Number of records
print(f"Records: {list(r.json().items())[0][1]['records']}")

# Make subsequent searches if the query returned more then 200 records
while int(list(r.json().items())[0][1]['to']) < int(list(r.json().items())[0][1]['records']):
    start += 200
    r = search_db(query, start)
    results.append(r)
    print(f"Range: {str(list(r.json().items())[0][1]['from'])} - {str(list(r.json().items())[0][1]['to'])}")

Records: 12


# Explore the result

In [5]:
for x in list(results[0].json().items())[0][1]['list']:
    print(x)
    break

    
# Title
print(x['title'])

# Type of media
print(x['type'])

# Keywords

# Abstract
try:
    print(x['description'])
except:
    pass

# DOI

# Identifier
print(x['identifier'])

# eid

# pii

# Journal

# Date


{'identifier': 'http://swepub.kb.se/bib/swepub:oai:gup.ub.gu.se/300954', 'title': 'Invasion and distribution of the redclaw crayfish, Cherax quadricarinatus, in Martinique', 'creator': ['Baudry, T.', 'Becking, T.', 'Gout, J. P.', 'Arque, A.', 'Gan, H. M.', 'Austin, C. M.', 'Delaunay, C.', 'Smith-Ravin, J.', 'Roques, Jonathan, 1985', 'Grandjean, F.', 'Göteborgs universitet Institutionen för biologi och miljövetenskap', 'Göteborgs universitet', 'Gothenburg University'], 'type': 'article', 'publisher': '', 'date': '2020', 'language': 'eng', 'description': 'The redclaw crayfish, Cherax quadricarinatus, was introduced to Martinique Island for aquaculture purposes in 2004, in an attempt to revitalize the freshwater crustacean aquaculture sector. In 2015, three wild populations were discovered during an electrofishing survey on fish diversity. In 2018, a specific crayfish survey was performed at night using spotlighting and baited traps at 34 sites throughout the island. The species was mostl

# Explore abstracts in html format

In [6]:
publications = []

with open("original_abstracts_Libris.html", "w") as file:
    for result in results:
        for media in list(result.json().items())[0][1]['list']:
        
            media_list = ['book', 'article', 'E-book', 'E-article']
            # Only look at books and articles
            if media['type'] in media_list:        
        
                title = "<h1>" + media["title"] + "</h1>" + "\n"
        
                identity = "<p><a href=" + \
                            media['identifier'] + \
                            ">" + \
                            media['identifier'] + \
                            "</a></p>" + \
                            "\n"
                media_type = media["type"]
            
                # First make sure there is a description for this item,
                # then concatenate the list of descriptions if needed.
                
                try:
                    media['description'] == True
                    if type(media['description']) == list:
                        description = ""
                        for i in range(len(media['description'])):
                            description += str(media['description'][i] + "</p><p>")
                    elif type(media['description']) == str:
                        description = media['description']
            
                    abstract = "<p>" + \
                                description + \
                                "</p>" + \
                                "\n"
                except:    
                    abstract = "<p>No Abstract</p>"
        
                string = title + identity + media_type + abstract
            
                publications.append(Publication(title = media["title"], identifyer = media['identifier'], abstract = description))
        
                file.write(string)

In [7]:
# Alternative method of writing the result in html format
# after data has been stores as Publication objects.
with open("test.html", "w") as file:
    for i in publications:
        file.write(i.to_html())

# Save result to binary file

In [8]:
# Save the result to a binary file, and analyse it together with data from other searches.
pickle.dump(publications, open("swepub_eDNA.p" ,"wb"))