In [7]:
import os
import datetime
from typing import Iterable, Callable

import requests
import pybliometrics
from pybliometrics.scopus import config, AbstractRetrieval, AuthorRetrieval, ScopusSearch

In [8]:
PUBTRACK_TOKEN = '7313f9788f8d1543a78bdaacc7da8f610db0eb1c'
PUBTRACK_URL = 'http://0.0.0.0:8000/api/v1'

SCOPUS_KEY = '013ff70c81049af047c0648e87278a9a'
# pybliometrics.scopus.utils.create_config()
config['Authentication']['APIKey'] = SCOPUS_KEY

print(SCOPUS_KEY)

013ff70c81049af047c0648e87278a9a


In [9]:
# kang_info_dict = scopus.retrieve_author('36635367700')
# kang_info_dict.keys()

In [10]:
def exclude_keys(d: dict, keys: Iterable):
    copy = d.copy()
    for key in keys:
        del copy[key]
        
    return copy


def mpk(*pks, glue='/'):
    return glue.join(pks)


class ScopusPublicationAdapter:
    
    def __init__(self, abstract_retrieval: AbstractRetrieval):
        self.abstract_retrieval = abstract_retrieval
    
    def get_publication(self):
        return {
            'title':        self.abstract_retrieval.title,
            'published':    self._convert_date(self.abstract_retrieval.coverDate),
            'doi':          self.abstract_retrieval.doi,
            'scopus_id':    self.abstract_retrieval.identifier,
            'authors':      self.get_authors()
        }
    
    def get_authors(self):
        results = []
        for author in self.abstract_retrieval.authors:
            results.append({
                'first_name':       author.given_name,
                'last_name':        author.surname,
                'scopus_id':        author.auid
            })
        return results
    
    def _convert_date(self, date: str):
        return "2020-06-14T14:07:38+0000"


class Endpoint:
    
    def __init__(self, url:str, token: str):
        self.url = url
        self.token = token
    
    # COMPOSITE API OPERATIONS
    # ------------------------
    
    def post_or_get(self, data: dict, **get):
        try:
            return self.post(data)
        except ConnectionError:
            return self.get_by(**get)
        
    def get_by(self, **params):
        results = self.get('', params)['results']
        if len(results) == 0:
            raise FileNotFoundError('{} not found {}'.format(str(self), str(params)))
        return results[0]
        
    # BASIC API OPERATIONS
    # --------------------
        
    def delete(self, pk: str):
        return self._request('delete', {
            'url':          self._get_url(pk)
        })
    
    
    def put(self, pk:str, data: dict = {}):
        return self._request('put', {
            'url':          self._get_url(pk),
            'json':         data
        })
    
    def patch(self, pk:str, patch: dict = {}):
        return self._request('patch', {
            'url':          self._get_url(pk),
            'json':         patch
        })
    
    def post(self, data: dict = {}):
        return self._request('post', {
            'url':          self._get_url(),
            'json':         data    
        })
    
    def get(self, pk: str, params: dict = {}):
        return self._request('get', {
            'url':          self._get_url(pk),
            'params':       params
        })
    
    # PRIVATE METHODS
    # ---------------
        
    def _request(self, type: str, kwargs: dict):
        kwargs.update({
            'headers':      self._get_headers()
        })
        func = getattr(requests, type)
        response = func(**kwargs)
        if response.status_code in [400]:
            print(response.json())
            raise ConnectionError('Request "{}" with kwargs: {} failed'.format(type, str(kwargs)))
        else:
            return response.json()
        
    def _get_url(self, *args):
        base_url = os.path.join(self.url, self.get_endpoint())
        url = os.path.join(base_url, *args)
        if url[-1] != '/':
            url += '/'
        return url
        
    def _get_headers(self):
        return {
            'Authorization':    'TOKEN {}'.format(self.token)
        }
    
    # MAGIC METHODS
    # -------------
    
    def __call__(self, pk=''):
        self.get(pk)
        
    def __str__(self):
        return "ENDPOINT {}".format(self._get_url())
        
    # ABSTRACT METHODS
    # ----------------
        
    def get_endpoint(self):
        raise NotImplementedError()


class AuthorEndpoint(Endpoint):
    
    ENDPOINT = 'authors'
    
    def __init__(self, url, token):
        super(AuthorEndpoint, self).__init__(url, token)

    def get_endpoint(self):
        return self.ENDPOINT
    
    
class PublicationEndpoint(Endpoint):
    
    ENDPOINT = 'publications'
    
    def __init__(self, url: str, token: str):
        super(PublicationEndpoint, self).__init__(url, token)
        
    def get_endpoint(self):
        return self.ENDPOINT


class AuthoringEndpoint(Endpoint):
    
    ENDPOINT = 'authorings'
        
    def __init__(self, url: str, token: str):
        super(AuthoringEndpoint, self).__init__(url, token)

    def get_endpoint(self):
        return self.ENDPOINT   
        

class PubTrack:
    
    
    def __init__(self, url="", token=""):
        self.token = token
        self.url = url

    # RAW ENDPOINTS
    # #############
       
    # Make this cached properties 
    @property
    def author(self):
        return AuthorEndpoint(self.url, self.token)
    
    @property
    def publication(self):
        return PublicationEndpoint(self.url, self.token)
    
    @property
    def authoring(self):
        return AuthoringEndpoint(self.url, self.token)

    # ADDITIONAL FUNCTIONALITY
    # ########################
    
    def import_publication(self, publication: dict, update: bool = False):
        # post just the publication
        publication_base = exclude_keys(publication, ['authors'])
        try:
            response = self.publication.post(publication_base)
            publication['uuid'] = response['uuid']
        except ConnectionError as e:
            raise e
        
        self._import_publication_authors(publication)

    # PRIVATE METHODS
    # ############### 
    
    def _import_publication_authors(self, publication: dict):
        authors = publication['authors']
        authors = self._import_authors(authors)
        for author in authors:
            # Attempt to post the authorings
            self.authoring.post({
                'author':           author['slug'],
                'publication':      publication['uuid']
            })
        
    def _import_authors(self, authors: Iterable[dict]):
        result = []
        for author in authors:
            # The actual author
            response = self.author.post_or_get(author, scopus_id=author['scopus_id'])
            result.append(response)
        return result    
        

### How is import supposed to work?

What happens if the publication already exists?
<br>
I think in this case nothing should happen, or rather an exception be called. Because the verb "import" 
is only meaningful if the item does not really exist. In such a case I would define a different method "update"
<br><br>
How should authors be handled?
<br>
Of course if an author does not exist it is posted. Well the only thing, which can really happen here is that the 
POST is rejected because a unique constraint was violated. So I do have to create a method which will attempt to get 
the thingy by a some secondary values. For the author this is really straight forward actually. Since the only unique 
secondary attribute to check is the scopus id...

In [11]:
pubtrack = PubTrack(PUBTRACK_URL, PUBTRACK_TOKEN)
# pubtrack.author.get('jonas-teufel-d4gq')

publication = {
    'title':                'Automation is cool!',
    'published':            '2020-05-25T12:33:46+0000',
    'scopus_id':            '20',
    'kitopen_id':           '36',
    'doi':                  '28628-kslsi/oa28wk',
    'on_kitopen':           False,
    'authors':              [
        {
            'first_name':       'Karl',
            'last_name':        'Johann',
            'scopus_id':        '266'
        },
        {
            'scopus_id':        '2'
        }
    ]
}
# pubtrack.import_publication(publication)


In [12]:
ADDITIONAL_AUTHORS = 5
AUTHORS = ['35313939900']

MAX = 20
count = 0
for author_id in AUTHORS:
    search = ScopusSearch(f'AU-ID ( {author_id} )')
    
    for result in search.results:
        if count > MAX: break
        if result.doi is None: continue
        
        doi = result.doi
        try:
            abstract_retrieval = AbstractRetrieval(doi)
        except:
            continue
        
        print(doi)
        adapter = ScopusPublicationAdapter(abstract_retrieval)
        adapter = ScopusPublicationAdapter(abstract_retrieval)
        publication = adapter.get_publication()
        
        # Filter the authors
        authors = []
        author_count = 0
        for author in publication['authors']:
            if author['scopus_id'] in AUTHORS:
                authors.append(author)
            elif author_count < ADDITIONAL_AUTHORS:
                authors.append(author)
                author_count += 1
        publication['authors'] = authors
        # print(publication)
        try:
            pubtrack.import_publication(publication)
            print('POSTED')
            count += 1
        except:
            continue

10.1088/1361-6471/ab8480
POSTED
10.1140/epjc/s10052-020-7718-z
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
POSTED
10.1103/PhysRevLett.123.221802
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
POSTED
10.1140/epjc/s10052-019-7320-4
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exists.']}
{'scopus_id': ['author with this scopus id already exis