In [None]:
%load_ext autoreload

In [None]:
%autoreload 2
from pyzotero.zotero import Zotero
import os
from sqlite3 import connect

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.dispatcher import Dispatcher
from src.db import ZoteroDatabase
from src.scihub import SciHub
import src.utils as utils

In [None]:
zot = Zotero(os.environ['ZOTERO_USER_ID'], 'user', os.environ['ZOTERO_API_KEY'], preserve_json_order=True)

z = ZoteroDatabase(local=True)
d = Dispatcher(session=None)

items = z.get_items()

In [None]:
def create_abbr(name):
    name_lst = name.split()
    if sum(c.isalpha() for c in name) > len(name_lst):
        return tuple( n[0] + '.'  for n in name_lst )

    
def fix_name(name_dict, names, safe_mode=True, blacklist_path=None):
    """
    Does the following things in this order:
    Adds dots to abbreviated first names.
    Tries to complete first names if present in database.
    Tries to find additional first names.
    Tries to convert full name to first name-last name style.
    
    Arguments:
        name_dict: dict of one creator
        names: set of tuples of all names {(firstName, lastName, (first, names, abbreviated))}
        safe_mode: bool, skip possibly ambivalent operations
        blacklist_path: file path of blacklist json
    
    Returns:
        status: -1: no changes
                0: changed
                >0: skipped, bitmask
        dict of changed name, otherwise None
    """
    status = 0
    
    if blacklist_path is not None:
        with open(blacklist_path, 'r') as f:
            blacklist = json.load(f)
    
        if name_dict in blacklist:
            print("Skipped: {} blacklisted.".format(name_dict))
            return 1, None
    
    fnames = name_dict['firstName']
    fnames_lst = fnames.split()
    lname = name_dict['lastName']
    lname_lst = lname.split()
    
    # convert full names to first name, last name
    if len(lname_lst) > 1 and fnames == '':
        if safe_mode:
            print('Safe Mode: Skipped splitting full name {}'.format(lname))
            status += 2
        else:
            fnames_lst = lname_lst[:-1]
            lname = lname_lst[-1]
            print('Splitted full name into {} {}'.format(fnames_lst, lname))
    
    # add dots
    names_abbr = tuple( n + '.' if len(n) == 1 else n for n in fnames_lst )
    fnames_mod = ' '.join(names_abbr)
    
    # TODO: find full & additional names simultaneously
    full_fnames = list()
    add_fnames = list()
    for n in names:
        if lname == n[1]:
            # check for full first names
            if names_abbr == n[2]:
                full_fnames.append(n[0])
            
            # check for additional first names
            other_fnames = n[0].split()
            num_names = len(fnames_lst)
            if fnames_lst == other_fnames[:num_names] and num_names < len(other_fnames):
                # safe mode: only change if first(!) first name not abbreviated:
                if safe_mode:
                    if len(other_fnames[0].replace('.', '')) > 1:
                        add_fnames.append(n[0])
                    else:
                        print('Safe Mode: Skipped only abbreviated first name {} {}'.format(n[0], n[1]))
                        status += 4
                else:
                    add_fnames.append(n[0])
            
    for lst in [add_fnames, full_fnames]:
        if len(lst) == 1:
            fnames_mod = lst[0]
            print('Changed {} into {} {}'.format(fnames, fnames_mod, lname))
        elif len(lst) > 1:
            print('Skipped: Multiple first names found for {} {}'.format(lst, lname))
            status += 8
    
    name_dict_mod = copy.deepcopy(name_dict)
    name_dict_mod['firstName'] = fnames_mod
    name_dict_mod['lastName'] = lname
    
    if status > 0:
        return status, None
    else:
        if name_dict_mod != name_dict:
            return status, name_dict_mod
        else:
            return -1, None
    
    


names = { ( c.get('firstName', ''), c.get('lastName', ''),
            create_abbr(c.get('firstName', '')) ) for i in items for c in i.get('creators', []) }



names
#fix_name({'firstName': 'Jonathan F', 'lastName': 'Donges'}, names)
fix_name({'bla': 1, 'firstName': 'Robert U', 'lastName': 'Ayres'}, names)

In [None]:
{'bla': 1, 'firstName': 'Robert U', 'lastName': 'Ayres'} in [{'bla': 1, 'firstName': 'Robert U', 'lastName': 'Ayres'}]

In [None]:
fix_name({'firstName': 'R.', 'lastName': 'Ayres'}, {('R. B.', 'Ayres', ('R.', 'B.'))}, safe_mode=True)


In [None]:
import copy

def fix_names(self, keys=[], safe_mode=True, blacklist_path=None):
    """
    Fix names for selected keys
    
    Arguments
        keys: list of keys
        safe_mode: if True, skip ambivalent renamings. If blacklist_path not None, create blacklist
        blacklist_path: path to blacklist json
    
    """
    all_items = self.db.get_items()
    names = { ( c.get('firstName', ''), c.get('lastName', ''),
            create_abbr(c.get('firstName', '')) ) for i in all_items for c in i.get('creators', []) }
    
    items = self.db.get_items(keys)

    items_mod = list()
    skipped = list()
    for i in items:
        creators_mod = copy.deepcopy(i.get('creators', []))
        mod = 0
        for n, c in enumerate(i.get('creators', [])):
            status, name_mod = fix_name(c, names, safe_mode=safe_mode, blacklist_path=blacklist_path)
            if status == 0:
                creators_mod[n] = name_mod
                mod +=1
            elif status > 0:
                skipped.append(c)
        if mod > 0:
            item_mod = {'key': i['key'], 'version': i['version'], 'creators': creators_mod}
            items_mod.append(item_mod)
    
    if blacklist_path is not None:
        skipped = [dict(t) for t in {tuple(sorted(d.items())) for d in skipped}]
        with open(blacklist_path, 'w') as f:
            json.dump(skipped, f, indent=4)
            
    #self.db.batch_update_items(items_mod)
            
    return items_mod

fix_names(d, blacklist_path='blacklist.json')

In [None]:
z.get_items(list({'2UYDUXQM': 1}.keys()))

In [None]:
from json import JSONDecodeError
try:
    blacklist = json.loads('')
except (FileNotFoundError, json.JSONDecodeError):
    blacklist = None

In [None]:
item = z.get_items('B9XCY7CH')[0]
item = {k: item[k] for k in ['key', 'version', 'creators']}
item['creators'] = [{'creatorType': 'author',
   'firstName': 'Andreas',
   'lastName': 'Chatzidakis'},
  {'creatorType': 'author', 'firstName': 'Gretchen', 'lastName': 'Larsen'},
  {'creatorType': 'author', 'firstName': 'Simon', 'lastName': 'Bishop'}]

z.update_items([item])

In [None]:
# z.get_items()
z.get_items_creators_local()

In [None]:
keys = ['XXLF2GYS', 'G3GP7TW8','UYJJPTXG','4SDR5E5R','JTWSQKIS','AG9I49JZ', '7ARSI4KK']
item_type = 'note || attachment'

# items = z.get_items(keys=keys, use_cache=False)
# items = z.get_notes(keys=keys)
items = z.get_attachments(keys=keys)
items[0]

In [None]:
file_types = ['pdf', 'epub', 'djvu', 'mobi', 'okular'] # alsp PDF!
# present_file_type = {a.get('path','').split('.')[-1] for a in attachments}

d = Dispatcher(session=None)
items = z.get_items(keys=[])


def check_filenames(items):
    keys = [i['key'] for i in items]
    attachments = z.get_attachments(parent_keys=keys)
    attachments_dict = {i['key']: [(a['path'], 'attachments:' + d.build_file_name(i)) for a in attachments
                                   if a.get('parentItem') == i['key']
                                   and 'path' in a 
                                   and a['path'] != 'attachments:' + d.build_file_name(i)]
                        for i in items}
    return attachments_dict


check_filenames(items)

In [None]:
file_types = ['pdf', 'epub', 'djvu', 'mobi', 'okular'] # also PDF!

selected_keys = ['XXLF2GYS', 'G3GP7TW8','UYJJPTXG','4SDR5E5R','JTWSQKIS','AG9I49JZ', '7ARSI4KK']


import re

def get_doi(item):
    
    def get_match(s, regex):
        r = re.compile(regex, re.IGNORECASE)
        match = r.search(s)
        if match:
            return match.group()
        
    r = r'10.\d{4,9}\/[-._;()/:A-Z0-9]+'
    return item.get('DOI') or get_match(item.get('url', ''), r) or get_match(item.get('extra', ''), r)
    


items = z.get_items(keys=selected_keys)
attachments = z.get_attachments(parent_keys=selected_keys)

# attachments_dict = {i['key']: [a['path'] for a in attachments
#                                if 'path' in a and a.get('parentItem') == i['key']
#                                and (a.get('path', '').split('.')[-1]).lower() in file_types] for i in items}
# attachments_dict

attachment_keys = [a['parentItem'] for a in attachments
                   if 'parentItem' in a and (a.get('path', '').split('.')[-1]).lower() in file_types]

no_attachments_dict = {i['key']: {'url': i.get('url'), 'DOI': get_doi(i), 'ISBN': i.get('ISBN')} for i in items if i['key'] not in attachment_keys}

no_attachments_dict

# misc

In [None]:
z.item_types()
z.item_type_fields('note')
z.item_attachment_link_modes()
# z.item_template('journalArticle')
z.item_template('attachment', 'linked_file')

# Zotero

In [None]:
zot.

# scihub

In [None]:
import unicodedata

In [None]:
text = 'dalsdk-.;asjdn'
text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
text

In [None]:
identifier = '10.1016/j.ssmph.2019.10038'

sh = SciHub(use_fallback=False)
# sh.available_base_url_list
# result = sh.fetch('http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=1648853')
result = sh.download(identifier, path='/home/boris/Downloads/paper.pdf')
# sh._get_available_scihub_urls()
result

# lisc - retrieve DOI relations

In [None]:
z = Zotero(os.environ['ZOTERO_USER_ID'], 'user', os.environ['ZOTERO_API_KEY'], preserve_json_order=True)
item = z.item('2BCDWE4F')['data']
# item2 = z.item('7ARSI4KK')
item['relations'] = {'dc:relation': ['http://zotero.org/users/5832834/items/TZNEQBL3',
                                     'http://zotero.org/users/5832834/items/KQNSY94T',
                                     'http://zotero.org/users/5832834/items/PIM5RGMN',
                                     'http://zotero.org/users/5832834/items/WDYQMILT']
}

# items = [{'key': 'AG9I49JZ',
#           'note': None,
#           'path': None,
#           'itemType': 'book',
#         'relations': {'dc:relation': ['http://zotero.org/users/5832834/items/7ARSI4KK']}}]
items = [item]

z.check_items(items)
# z.update_items(items)

In [None]:
from lisc.requester import Requester
from lisc.urls.open_citations import OpenCitations

util = 'references' # citations, references
util = 'citations'
settings = {'format': 'json'}
dois = ['10.1016/S0305-750X(01)00109-7']
dois = ['10.1093/oxrep/grx056']

urls = OpenCitations()
urls.build_url(util=util)

url = urls.get_url(util=util, segments=dois, settings=settings)
print(url)
# urls.authenticate(url)
# urls.fill_settings(format='json')
# urls.check_url(util)

req = Requester(wait_time=0.1, logging=None)
# req.check()
r = req.request_url(url)
req.close()
r.json()

# Prompt Toolkit

In [None]:
from prompt_toolkit.completion import NestedCompleter, WordCompleter

In [None]:
comp = {'do': {'this': None,
               WordCompleter(['that']): WordCompleter(['thing'])
              } 
       }
NestedCompleter.from_nested_dict(comp)