# Xsearch - Libris/SwePub

In [8]:
import requests
import pickle
from lib.Publications import Publication

# Query string

In [16]:
query = '("eDNA" OR "environmental DNA" OR "metabarcoding" OR "eRNA" OR "environmental RNA") AND ("biodiversity" OR "species richness" OR "monitoring" OR "biomonitoring") AND ("high throughput sequencing" OR "HTS" OR "throughput")'
# Query Libris or SwePub
db = "libris" 

# Search Libris

In [17]:
def search_db(query, start=1):
    # http://librishelp.libris.kb.se/help/xsearch_swe.jsp?open=tech
    return requests.get(f'https://libris.kb.se/xsearch?query={query}&order=alphabetical&format=json&start={start}&n=200&format_level=full&database={db}')


In [18]:
# The maximum number of records to retreive when using Xsearch is 200.
# This loop will download all records if this limit is exceded.

# Make the first database search
start = 1
results = []
r = search_db(query)
results.append(r)

print(results)
print(f'https://libris.kb.se/xsearch?query=%C3%A4mne:{query}&order=alphabetical&format=json&start={start}&n=200&format_level=full&database={db}')
# Number of records
print(f"Records: {list(r.json().items())[0][1]['records']}")

# Make subsequent searches if the query returned more then 200 records
while int(list(r.json().items())[0][1]['to']) < int(list(r.json().items())[0][1]['records']):
    start += 200
    r = search_db(query, start)
    results.append(r)
    print(f"Range: {str(list(r.json().items())[0][1]['from'])} - {str(list(r.json().items())[0][1]['to'])}")

[<Response [200]>]
https://libris.kb.se/xsearch?query=%C3%A4mne:("eDNA" OR "environmental DNA" OR "metabarcoding" OR "eRNA" OR "environmental RNA") AND ("biodiversity" OR "species richness" OR "monitoring" OR "biomonitoring") AND ("high throughput sequencing" OR "HTS" OR "throughput")&order=alphabetical&format=json&start=1&n=200&format_level=full&database=libris
Records: 11


# Explore the result

In [19]:
for x in list(results[0].json().items())[0][1]['list']:
    print(x)
    break

    
# Title
print(x['title'])

# Type of media
print(x['type'])

# Keywords

# Abstract
try:
    print(x['description'])
except:
    pass

# DOI

# Identifier
print(x['identifier'])

# eid

# pii

# Journal

# Date


{'identifier': 'http://libris.kb.se/bib/v4716jt6s49cgn5c', 'title': 'Great differences in performance and outcome of high-throughput sequencing data analysis platforms for fungal metabarcoding [Elektronisk resurs]', 'creator': ['Anslan, Sten', 'Nilsson, R. Henrik', 'Wurzbacher, Christian', 'Baldrian, Petr', 'Tedersoo, Leho', 'Bahram, Mohammad', 'Uppsala universitet Teknisk-naturvetenskapliga vetenskapsområdet'], 'type': 'E-article', 'publisher': 'PENSOFT PUBL', 'date': '2018', 'language': 'eng', 'description': 'Along with recent developments in high-throughput sequencing (HTS) technologies and thus fast accumulation of HTS data, there has been a growing need and interest for developing tools for HTS data processing and communication. In particular, a number of bioinformatics tools have been designed for analysing metabarcoding data, each with specific features, assumptions and outputs. To evaluate the potential effect of the application of different bioinformatics workflow on the resul

# Explore abstracts in html format

In [20]:
publications = []

with open("original_abstracts_SwePub.html", "w") as file:
    for result in results:
        for media in list(result.json().items())[0][1]['list']:
        
            media_list = ['book', 'article', 'E-book', 'E-article']
            # Only look at books and articles
            if media['type'] in media_list:        
        
                title = "<h1>" + media["title"] + "</h1>" + "\n"
        
                identity = "<p><a href=" + \
                            media['identifier'] + \
                            ">" + \
                            media['identifier'] + \
                            "</a></p>" + \
                            "\n"
                media_type = media["type"]
            
                # First make sure there is a description for this item,
                # then concatenate the list of descriptions if needed.
                
                try:
                    media['description'] == True
                    if type(media['description']) == list:
                        description = ""
                        for i in range(len(media['description'])):
                            description += str(media['description'][i] + "</p><p>")
                    elif type(media['description']) == str:
                        description = media['description']
            
                    abstract = "<p>" + \
                                description + \
                                "</p>" + \
                                "\n"
                except:    
                    abstract = "<p>No Abstract</p>"
        
                string = title + identity + media_type + abstract
            
                publications.append(Publication(title = media["title"], identifyer = media['identifier'], abstract = description))
        
                file.write(string)

In [22]:
# Alternative method of writing the result in html format
# after data has been stores as Publication objects.
with open("test.html", "w") as file:
    for i in publications:
        file.write(i.to_html())

# Save result to binary file

In [23]:
# Save the result to a binary file, and analyse it together with data from other searches.
pickle.dump(publications, open("libris.p" ,"wb"))