# CORD-19-collect-scopus-data

In general, this jupyter notebook is designated to collect additional data via scopus to enbroaden the CORD19 dataset: 
https://datadryad.org/stash/dataset/doi:10.5061/dryad.vmcvdncs0

First, relevant packages must be imported to the Notebook.

In [1]:
import numpy as np
import pandas as pd
import csv
import ast
import collections
import matplotlib.pyplot as plt
import Levenshtein as lev
from fuzzywuzzy import fuzz 
import datetime
import matplotlib.pyplot as plt
import re
from urllib.parse import urlparse
from collections import Counter

from elsapy.elsclient import ElsClient
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

import time # for sleep
from pybtex.database import parse_file, BibliographyData, Entry
import json
from elsapy.elsclient import ElsClient
from elsapy.elsdoc import AbsDoc
from elsapy.elssearch import ElsSearch

In [2]:
CORD19_CSV = pd.read_csv('../data/cord-19/CORD19_software_mentions.csv')

In [3]:
len(CORD19_CSV['doi'])

77448

In [4]:
doi = CORD19_CSV['doi']
doi

0                                 NaN
1          10.1016/j.regg.2021.01.002
2           10.1016/j.rec.2020.08.002
3        10.1016/j.vetmic.2006.11.026
4                   10.3390/v12080849
                     ...             
77443      10.1007/s11229-020-02869-9
77444                             NaN
77445     10.1101/2020.05.13.20100206
77446      10.1007/s42991-020-00052-8
77447     10.1101/2020.09.14.20194670
Name: doi, Length: 77448, dtype: object

In [5]:
#delete Nans
doi_counted = doi.value_counts()
doi_counted

10.31729/jnma.5498              2
10.1016/j.dsx.2020.04.012       2
10.1002/jmv.25751               1
10.1093/nar/gkr1064             1
10.1016/j.ijcha.2020.100675     1
                               ..
10.1016/j.mtcomm.2020.101734    1
10.1007/978-3-030-50423-6_37    1
10.1101/2020.11.25.20234195     1
10.1111/jth.15044               1
10.1177/0956797620939054        1
Name: doi, Length: 74302, dtype: int64

In [6]:
def AffiliationsFromScopusByDOI(client, doi):
    """obtain addiotional paper information from scopus by doi
    """
    doc_srch = ElsSearch("DOI("+doi+")",'scopus')
    doc_srch.execute(client, get_all = True)
    #print ("doc_srch has", len(doc_srch.results), "results.")
    #print(doc_srch.results)
    try:
        scopus_id=doc_srch.results[0]["dc:identifier"].split(":")[1]
        scp_doc = AbsDoc(scp_id = scopus_id)
        if scp_doc.read(client):
            # print ("scp_doc.title: ", scp_doc.title)
            scp_doc.write()   
        else:
            print ("Read document failed.")
        # print(scp_doc.data["affiliation"])
        return scp_doc.data
    except:
        return None

In [7]:
## Load configuration
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
## Initialize client
client = ElsClient(config['apikey'])
# https://api.elsevier.com/content/search/scopus?query=DOI(10.1109/MCOM.2016.7509373)&apiKey=6d485ef1fe1408712f37e8a783a285a4

In [8]:
df_current_extra_info = pd.read_pickle('extra_info_CS5099.pkl')
df_current_extra_info

Unnamed: 0,affiliation,coredata
0,"[{'affiliation-city': None, 'affilname': 'Pata...","{'srctype': 'j', 'prism:issueIdentifier': '230..."
1,"[{'affiliation-city': 'New Delhi', 'affilname'...","{'srctype': 'j', 'eid': '2-s2.0-85083171050', ..."
2,,
3,"[{'affiliation-city': 'London', 'affilname': '...","{'srctype': 'j', 'eid': '2-s2.0-79953057246', ..."
4,"[{'affiliation-city': 'Baoding', 'affilname': ...","{'srctype': 'j', 'prism:issueIdentifier': '7',..."
...,...,...
50082,"[{'affiliation-city': 'Evanston', 'affilname':...","{'srctype': 'j', 'eid': '2-s2.0-84924617839', ..."
50083,"[{'affiliation-city': 'Jaipur', 'affilname': '...","{'srctype': 'j', 'prism:issueIdentifier': '3',..."
50084,"[{'affiliation-city': 'Charleston', 'affilname...","{'srctype': 'j', 'prism:issueIdentifier': '6',..."
50085,"[{'affiliation-city': 'Iksan', 'affilname': 'N...","{'srctype': 'j', 'eid': '2-s2.0-85096923466', ..."


In [9]:
len_df_current_extra_info = len(df_current_extra_info)
len_df_current_extra_info

50087

In [None]:
%%time
i = len_df_current_extra_info
dict_new_extra_info = dict()
len_dois = len(doi_counted)
while i < len_dois:
    print("Position: " + str(i) + " -> " +  doi_counted.index[i])
    dict_new_extra_info[i] = AffiliationsFromScopusByDOI(client, doi_counted.index[i])
    i = i + 1 

Position: 50087 -> 10.1016/j.ahj.2020.10.069
Position: 50088 -> 10.1093/infdis/jit468
Position: 50089 -> 10.1101/2020.04.27.20081349
Position: 50090 -> 10.1111/febs.13553
Position: 50091 -> 10.1007/s10389-020-01295-y
Position: 50092 -> 10.1007/978-3-030-51935-3_34
Position: 50093 -> 10.1016/j.eswa.2009.06.032
Position: 50094 -> 10.1186/1742-4682-11-22
Position: 50095 -> 10.1186/1743-422x-4-20
Position: 50096 -> 10.1038/s41380-021-01019-y
Position: 50097 -> 10.1101/2020.12.12.422477
Position: 50098 -> 10.1245/s10434-020-09026-z
Position: 50099 -> 10.1007/s11936-020-00869-z
Position: 50100 -> 10.3389/fpsyg.2020.548506
Position: 50101 -> 10.1155/2020/6675206
Position: 50102 -> 10.1016/j.tvjl.2017.06.009
Position: 50103 -> 10.1186/s12931-020-01548-0
Position: 50104 -> 10.1007/s10900-020-00952-3
Position: 50105 -> 10.1186/s13063-020-04932-9
Position: 50106 -> 10.1007/s12262-020-02592-2
Position: 50107 -> 10.1164/rccm.202007-2670le
Position: 50108 -> 10.1016/j.jviromet.2020.114013
Position: 

Position: 50267 -> 10.1016/j.childyouth.2020.105783
Position: 50268 -> 10.1002/fsn3.1622
Position: 50269 -> 10.1007/s00198-020-05483-0
Position: 50270 -> 10.12688/hrbopenres.13053.2
Position: 50271 -> 10.1016/j.physa.2016.04.009
Position: 50272 -> 10.1007/s10453-014-9342-6
Position: 50273 -> 10.1101/2020.09.25.20201582
Position: 50274 -> 10.1016/j.ajic.2006.03.014
Position: 50275 -> 10.1007/978-3-030-47426-3_20
Position: 50276 -> 10.1007/s00580-012-1405-x
Position: 50277 -> 10.3758/s13428-011-0175-8
Position: 50278 -> 10.1007/s00464-020-08006-4
Position: 50279 -> 10.1101/2020.10.16.20214049
Position: 50280 -> 10.1007/s00115-020-00973-2
Position: 50281 -> 10.1186/s40249-020-00770-8
Position: 50282 -> 10.1016/j.psychres.2020.113394
Position: 50283 -> 10.1007/s11469-020-00424-8
Position: 50284 -> 10.4081/ejtm.2019.9032
Position: 50285 -> 10.1007/s11948-019-00099-y
Position: 50286 -> 10.1007/s10707-006-0015-7
Position: 50287 -> 10.1186/s13040-021-00233-2
Position: 50288 -> 10.1183/13993003

Position: 50448 -> 10.1016/j.jpubeco.2020.104238
Position: 50449 -> 10.1038/s41392-020-00376-4
Position: 50450 -> 10.18632/aging.103716
Position: 50451 -> 10.1155/2017/2978718
Position: 50452 -> 10.1101/2020.06.05.135996
Position: 50453 -> 10.1145/3411764.3445596
Position: 50454 -> 10.1016/j.jaci.2016.12.963
Position: 50455 -> 10.1155/2019/8530273
Position: 50456 -> 10.1111/gean.12241
Position: 50457 -> 10.1016/j.bja.2020.12.002
Position: 50458 -> 10.1016/j.vaccine.2013.03.055
Position: 50459 -> 10.1038/s41380-020-00936-8
Position: 50460 -> 10.1186/s12889-020-09867-z
Position: 50461 -> 10.1016/j.jep.2016.12.042
Position: 50462 -> 10.1002/hep.31472
Position: 50463 -> 10.3390/ijerph17249418
Position: 50464 -> 10.1101/2020.07.17.20156430
Position: 50465 -> 10.1101/759902
Position: 50466 -> 10.1007/s11846-020-00423-y
Position: 50467 -> 10.1007/s11192-020-03564-9
Position: 50468 -> 10.1111/sltb.12655
Position: 50469 -> 10.3201/eid2608.201525
Position: 50470 -> 10.3390/ijerph17238827
Positio

Position: 50632 -> 10.1021/acs.jcim.0c00319
Position: 50633 -> 10.1007/s00134-020-06061-y
Position: 50634 -> 10.1016/0022-1759(92)90136-h
Position: 50635 -> 10.3390/ijerph17145059
Position: 50636 -> 10.1016/j.jns.2020.117081
Position: 50637 -> 10.1016/j.chaos.2020.110295
Position: 50638 -> 10.1038/s41423-019-0270-9
Position: 50639 -> 10.1186/s12985-016-0551-1
Position: 50640 -> 10.1007/s00466-020-01881-7
Position: 50641 -> 10.3389/fpsyt.2020.559266
Position: 50642 -> 10.3390/v12101163
Position: 50643 -> 10.1002/pro.2823
Position: 50644 -> 10.18632/oncotarget.27672
Position: 50645 -> 10.1101/2020.04.21.20073262
Position: 50646 -> 10.3389/fphar.2020.588132
Position: 50647 -> 10.1093/ajcp/aqaa187
Position: 50648 -> 10.1155/2020/9254503
Position: 50649 -> 10.1016/j.vaccine.2020.12.083
Position: 50650 -> 10.1038/gt.2009.92
Position: 50651 -> 10.1101/2020.04.06.20055848
Position: 50652 -> 10.1016/j.jinf.2020.11.028
Position: 50653 -> 10.1186/s12879-017-2754-5
Position: 50654 -> 10.1016/j.vir

Position: 50814 -> 10.1101/2020.08.14.20175299
Position: 50815 -> 10.1080/07391102.2020.1837679
Position: 50816 -> 10.1186/s13756-020-00803-9
Position: 50817 -> 10.3390/genes9110560
Position: 50818 -> 10.1371/journal.pntd.0008806
Position: 50819 -> 10.1101/2020.04.17.047324
Position: 50820 -> 10.1099/vir.0.000071
Position: 50821 -> 10.1016/j.jmii.2020.04.013
Position: 50822 -> 10.1007/s40955-020-00166-z
Position: 50823 -> 10.7717/peerj.9399
Position: 50824 -> 10.1007/s10643-020-01136-3
Position: 50825 -> 10.1016/j.forpol.2020.102253
Position: 50826 -> 10.1016/j.ajem.2020.10.068
Position: 50827 -> 10.21203/rs.3.rs-73657/v1
Position: 50828 -> 10.1038/s41467-020-19248-0
Position: 50829 -> 10.3904/kjim.2019.092
Position: 50830 -> 10.1101/753806
Position: 50831 -> 10.1186/s40249-020-00737-9
Position: 50832 -> 10.1002/jdd.12499
Position: 50833 -> 10.1007/s00262-018-2201-5
Position: 50834 -> 10.1007/s10802-019-00577-8
Position: 50835 -> 10.7326/m20-1515
Position: 50836 -> 10.1016/s0022-2836(0

Position: 50996 -> 10.1186/s40249-020-00689-0
Position: 50997 -> 10.1016/j.virusres.2012.10.001
Position: 50998 -> 10.1111/1469-0691.12242
Position: 50999 -> 10.1016/j.virol.2010.04.019
Position: 51000 -> 10.1128/mbio.00826-17
Position: 51001 -> 10.1371/journal.pone.0235307
Position: 51002 -> 10.1093/jac/dkv085
Position: 51003 -> 10.1016/j.clinmicnews.2017.07.001
Position: 51004 -> 10.1016/j.jfma.2014.08.004
Position: 51005 -> 10.5195/ijt.2020.6328
Position: 51006 -> 10.1016/j.cell.2020.07.024
Position: 51007 -> 10.1186/s41512-020-00077-0
Position: 51008 -> 10.1086/528801
Position: 51009 -> 10.1186/s10194-020-01188-1
Position: 51010 -> 10.1186/1471-2199-13-22
Position: 51011 -> 10.1007/s10775-020-09443-2
Position: 51012 -> 10.1186/s12889-020-09410-0
Position: 51013 -> 10.1002/jmv.25686
Position: 51014 -> 10.1016/j.virol.2020.08.008
Position: 51015 -> 10.3402/iee.v3i0.21386
Position: 51016 -> 10.1093/bfgp/elr027
Position: 51017 -> 10.1021/acs.jmedchem.7b01732
Position: 51018 -> 10.1101/

Position: 51177 -> 10.1007/978-3-030-58796-3_57
Position: 51178 -> 10.1016/j.jfms.2007.10.010
Position: 51179 -> 10.1007/s00705-014-2150-z
Position: 51180 -> 10.1111/irv.12440
Position: 51181 -> 10.1101/2020.11.19.20234245
Position: 51182 -> 10.1016/j.jclinane.2005.12.004
Position: 51183 -> 10.1016/j.biopha.2020.110668
Position: 51184 -> 10.1016/j.diabres.2020.108440
Position: 51185 -> 10.1016/j.enfcli.2020.05.018
Position: 51186 -> 10.1186/s12917-017-1239-5
Position: 51187 -> 10.1016/s1936-878x(20)30777-4
Position: 51188 -> 10.1016/j.jbc.2021.100346
Position: 51189 -> 10.1186/s12879-020-4781-x
Position: 51190 -> 10.1186/s12934-020-01393-2
Position: 51191 -> 10.1101/2020.05.11.077651
Position: 51192 -> 10.1371/journal.pone.0193317
Position: 51193 -> 10.3390/pathogens7040097
Position: 51194 -> 10.1016/j.virusres.2010.01.015
Position: 51195 -> 10.1093/infdis/jiv140
Position: 51196 -> 10.3390/vaccines4040039
Position: 51197 -> 10.1007/978-3-642-17966-2_6
Position: 51198 -> 10.1101/2020.05

Position: 51359 -> 10.1016/s1473-3099(04)01173-9
Position: 51360 -> 10.1177/2381468320963070
Position: 51361 -> 10.1101/2020.09.24.311704
Position: 51362 -> 10.1111/pai.12330
Position: 51363 -> 10.14744/semb.2020.65902
Position: 51364 -> 10.1016/j.scitotenv.2020.143870
Position: 51365 -> 10.1186/s13244-020-00910-6
Position: 51366 -> 10.1016/j.imr.2020.100484
Position: 51367 -> 10.1371/journal.pone.0236041
Position: 51368 -> 10.1038/s41390-020-01114-w
Position: 51369 -> 10.1177/2050313x15596484
Position: 51370 -> 10.1093/nar/gkz340
Position: 51371 -> 10.1016/j.jsps.2020.08.003
Position: 51372 -> 10.1038/s41598-020-78054-2
Position: 51373 -> 10.1101/2020.12.22.423893
Position: 51374 -> 10.1101/2020.05.01.073171
Position: 51375 -> 10.1371/journal.pmed.0030525
Position: 51376 -> 10.1074/jbc.m110.120014
Position: 51377 -> 10.1101/2020.10.26.20219709
Position: 51378 -> 10.1371/journal.pone.0235654
Position: 51379 -> 10.1101/614958
Position: 51380 -> 10.1111/jgh.15292
Position: 51381 -> 10.10

Position: 51542 -> 10.1080/22221751.2019.1701953
Position: 51543 -> 10.3390/jcm9082524
Position: 51544 -> 10.1007/978-90-368-0562-9_8
Position: 51545 -> 10.1136/bmjopen-2020-040517
Position: 51546 -> 10.1007/s11125-020-09526-6
Position: 51547 -> 10.1101/2020.04.22.055897
Position: 51548 -> 10.1101/2020.05.04.20072447
Position: 51549 -> 10.1016/j.bbadis.2020.165995
Position: 51550 -> 10.1007/s00238-020-01703-2
Position: 51551 -> 10.1101/2020.11.12.380931
Position: 51552 -> 10.7326/m20-4207
Position: 51553 -> 10.1097/cce.0000000000000300
Position: 51554 -> 10.1002/mgg3.79
Position: 51555 -> 10.1093/infdis/jiw190
Position: 51556 -> 10.3390/ph14010056
Position: 51557 -> 10.1126/sciadv.abd6322
Position: 51558 -> 10.1128/jvi.00711-19
Position: 51559 -> 10.1016/j.placenta.2021.01.012
Position: 51560 -> 10.1007/s10489-020-01919-6
Position: 51561 -> 10.1016/j.diagmicrobio.2020.115161
Position: 51562 -> 10.1371/journal.ppat.1008079
Position: 51563 -> 10.1016/j.molimm.2007.02.029
Position: 51564 

Position: 51724 -> 10.1101/2020.04.19.20071373
Position: 51725 -> 10.1016/s0140-6736(20)32593-9
Position: 51726 -> 10.1186/1475-2859-10-102
Position: 51727 -> 10.1371/journal.pone.0240962
Position: 51728 -> 10.1128/mbio.00909-18
Position: 51729 -> 10.1155/2014/430650
Position: 51730 -> 10.1007/s11739-020-02315-1
Position: 51731 -> 10.1002/acr.24347
Position: 51732 -> 10.1007/978-3-030-50423-6_14
Position: 51733 -> 10.1016/j.mehy.2020.109875
Position: 51734 -> 10.1101/2020.05.07.20094987
Position: 51735 -> 10.1053/jfms.2001.0126
Position: 51736 -> 10.1093/cid/ciw284
Position: 51737 -> 10.1371/journal.pone.0134823
Position: 51738 -> 10.1371/journal.pone.0023016
Position: 51739 -> 10.1007/s15010-020-01541-1
Position: 51740 -> 10.1101/2020.05.03.20089417
Position: 51741 -> 10.1371/journal.pone.0016796
Position: 51742 -> 10.1007/s00104-012-2325-7
Position: 51743 -> 10.1093/jpids/pit061
Position: 51744 -> 10.1155/2020/9214159
Position: 51745 -> 10.1038/celldisc.2017.21
Position: 51746 -> 10.

Position: 51908 -> 10.1371/journal.pone.0242045
Position: 51909 -> 10.1016/j.vaccine.2020.06.066
Position: 51910 -> 10.7554/elife.58828
Position: 51911 -> 10.1016/b978-0-12-803678-5.00303-9
Position: 51912 -> 10.1101/2020.09.04.20188771
Position: 51913 -> 10.1186/s12929-020-00681-8
Position: 51914 -> 10.1016/s1470-2045(20)30310-7
Position: 51915 -> 10.1016/j.clim.2020.108611
Position: 51916 -> 10.1007/s10557-020-07133-3
Position: 51917 -> 10.1093/infdis/jiu396
Position: 51918 -> 10.4081/jphr.2020.1933
Position: 51919 -> 10.1016/j.jped.2014.01.006
Position: 51920 -> 10.1007/s11136-020-02724-x
Position: 51921 -> 10.1007/s10964-020-01374-z
Position: 51922 -> 10.3389/fgene.2020.558557
Position: 51923 -> 10.1186/s12891-020-03925-8
Position: 51924 -> 10.1111/j.1439-0442.2007.00902.x
Position: 51925 -> 10.3390/molecules25184321
Position: 51926 -> 10.1002/jmv.25493
Position: 51927 -> 10.1017/s095026881300304x
Position: 51928 -> 10.1016/j.ijid.2020.07.052
Position: 51929 -> 10.1007/s10916-020-0

Position: 52092 -> 10.1186/s13063-020-4136-3
Position: 52093 -> 10.3390/ijerph17228479
Position: 52094 -> 10.1016/j.gaceta.2020.11.001
Position: 52095 -> 10.3390/ijerph17114076
Position: 52096 -> 10.1101/2020.05.12.20099135
Position: 52097 -> 10.1007/s11299-020-00254-0
Position: 52098 -> 10.4103/ijoem.ijoem_49_20
Position: 52099 -> 10.1007/978-981-13-8518-6_21
Position: 52100 -> 10.1128/mra.01555-18
Position: 52101 -> 10.1097/01.npr.0000722356.37937.9a
Position: 52102 -> 10.1101/2020.05.16.20104430
Position: 52103 -> 10.3389/fpsyg.2020.600739
Position: 52104 -> 10.1016/j.medj.2020.12.013
Position: 52105 -> 10.1016/j.bios.2011.11.037
Position: 52106 -> 10.1007/978-981-10-6620-7_35
Position: 52107 -> 10.1101/2020.07.24.20161752
Position: 52108 -> 10.1038/s41598-020-80895-w
Position: 52109 -> 10.1371/journal.pntd.0005394
Position: 52110 -> 10.1093/tbm/ibaa110
Position: 52111 -> 10.1101/2020.11.11.20230045
Position: 52112 -> 10.1007/978-1-84996-133-2_7
Position: 52113 -> 10.1038/s41598-020

Position: 52270 -> 10.1245/s10434-020-08578-4
Position: 52271 -> 10.1007/11557067_31
Position: 52272 -> 10.3390/s21020423
Position: 52273 -> 10.1016/j.ijpe.2020.107762
Position: 52274 -> 10.1016/j.sjbs.2017.06.001
Position: 52275 -> 10.3382/ps/pez011
Position: 52276 -> 10.1007/s11420-020-09800-5
Position: 52277 -> 10.2217/fmb-2020-0063
Position: 52278 -> 10.1101/2020.06.10.144782
Position: 52279 -> 10.1101/2020.05.29.124107
Position: 52280 -> 10.1093/nar/gkt1228
Position: 52281 -> 10.1016/j.cie.2020.106610
Position: 52282 -> 10.1016/j.jcv.2012.01.008
Position: 52283 -> 10.1007/978-3-030-65785-7_41
Position: 52284 -> 10.1186/s12906-018-2150-8
Position: 52285 -> 10.1002/ajh.21720
Position: 52286 -> 10.1007/s12072-010-9169-3
Position: 52287 -> 10.1016/b978-0-12-374920-8.00120-x
Position: 52288 -> 10.1016/j.jbusres.2010.12.003
Position: 52289 -> 10.1016/j.jcjo.2020.12.009
Position: 52290 -> 10.1016/j.rce.2020.04.001
Position: 52291 -> 10.1007/s41649-020-00150-2
Position: 52292 -> 10.1007/9

Position: 52453 -> 10.1186/1746-6148-7-73
Position: 52454 -> 10.1186/cc9324
Position: 52455 -> 10.1016/j.nmni.2020.100756
Position: 52456 -> 10.1177/1751143720937957
Position: 52457 -> 10.1016/j.bmcl.2017.05.022
Position: 52458 -> 10.1016/j.jamcollsurg.2020.09.028
Position: 52459 -> 10.3892/etm.2020.8751
Position: 52460 -> 10.1038/s41401-019-0305-x
Position: 52461 -> 10.1002/dmrr.3388
Position: 52462 -> 10.7554/elife.22069
Position: 52463 -> 10.1101/2020.11.17.20232264
Position: 52464 -> 10.1111/eci.13351
Position: 52465 -> 10.1007/978-3-030-49161-1_34
Position: 52466 -> 10.1186/1751-0473-3-3
Position: 52467 -> 10.1101/2020.09.13.20193706
Position: 52468 -> 10.1016/j.virusres.2020.197961
Position: 52469 -> 10.1101/2020.10.28.359356
Position: 52470 -> 10.1007/s42843-021-00030-9
Position: 52471 -> 10.1101/2020.09.22.307637
Position: 52472 -> 10.1111/jonm.13121
Position: 52473 -> 10.1186/s12864-018-5128-5
Position: 52474 -> 10.1097/md.0000000000021906
Position: 52475 -> 10.1371/journal.po

Position: 52635 -> 10.1016/j.janxdis.2020.102315
Position: 52636 -> 10.1097/md.0000000000022179
Position: 52637 -> 10.1007/s40808-020-00838-2
Position: 52638 -> 10.7759/cureus.10400
Position: 52639 -> 10.1016/j.sleh.2020.07.007
Position: 52640 -> 10.3389/fphar.2020.576994
Position: 52641 -> 10.1016/j.jaip.2020.10.013
Position: 52642 -> 10.1186/s13059-020-02077-1
Position: 52643 -> 10.1093/cid/ciu225
Position: 52644 -> 10.1007/978-3-642-35879-1_62
Position: 52645 -> 10.1101/2020.07.31.20165647
Position: 52646 -> 10.1016/j.jpainsymman.2020.07.016
Position: 52647 -> 10.1093/braincomms/fcaa205
Position: 52648 -> 10.3201/eid1002.030793
Position: 52649 -> 10.1101/2020.10.30.20223461
Position: 52650 -> 10.1016/j.ygeno.2020.10.009
Position: 52651 -> 10.12688/hrbopenres.13066.1
Position: 52652 -> 10.1038/cdd.2016.3
Position: 52653 -> 10.1186/s13223-020-00457-7
Position: 52654 -> 10.1016/j.simpat.2020.102196
Position: 52655 -> 10.1002/bjs.11747
Position: 52656 -> 10.1016/j.technovation.2020.1021

Position: 52816 -> 10.1016/j.buildenv.2008.06.013
Position: 52817 -> 10.1186/1743-422x-6-33
Position: 52818 -> 10.1101/2020.08.02.20129767
Position: 52819 -> 10.1002/jmv.23265
Position: 52820 -> 10.1097/pec.0000000000002334
Position: 52821 -> 10.1057/s41311-020-00278-w
Position: 52822 -> 10.1007/978-90-481-2344-5_14
Position: 52823 -> 10.1371/journal.pone.0237630
Position: 52824 -> 10.1007/s43683-020-00011-2
Position: 52825 -> 10.1007/978-3-030-65785-7_21
Position: 52826 -> 10.1042/bsr20201256
Position: 52827 -> 10.1007/s00335-019-09797-1
Position: 52828 -> 10.1186/s40635-020-00308-0
Position: 52829 -> 10.1002/jmv.25934
Position: 52830 -> 10.1016/j.orthtr.2020.07.001
Position: 52831 -> 10.2147/ijn.s133430
Position: 52832 -> 10.1101/2020.06.18.159434
Position: 52833 -> 10.2147/cia.s283015
Position: 52834 -> 10.1007/s12088-020-00908-0
Position: 52835 -> 10.1016/j.scitotenv.2021.145158
Position: 52836 -> 10.1186/s12905-021-01177-9
Position: 52837 -> 10.1186/1743-422x-8-263
Position: 52838

Position: 52999 -> 10.1016/j.compbiomed.2020.104102
Position: 53000 -> 10.1101/2020.12.24.424245
Position: 53001 -> 10.1007/s00520-020-05924-w
Position: 53002 -> 10.1111/j.1442-8903.2009.00436.x
Position: 53003 -> 10.1016/j.tet.2020.131373
Position: 53004 -> 10.3389/fpsyg.2020.01283
Position: 53005 -> 10.1371/journal.pbio.3000698
Position: 53006 -> 10.1038/s41423-020-00557-9
Position: 53007 -> 10.1007/s12273-020-0703-5
Position: 53008 -> 10.1007/s00134-020-06312-y
Position: 53009 -> 10.1016/s0065-3527(06)68003-2
Position: 53010 -> 10.1007/978-3-030-47426-3_27
Position: 53011 -> 10.1007/s00705-019-04433-4
Position: 53012 -> 10.1007/s11427-007-0080-7
Position: 53013 -> 10.1016/j.eprac.2021.01.011
Position: 53014 -> 10.1007/s13244-011-0077-4
Position: 53015 -> 10.1186/s12888-020-02786-8
Position: 53016 -> 10.1016/j.medcli.2020.12.017
Position: 53017 -> 10.1186/1743-422x-8-146
Position: 53018 -> 10.1007/978-3-030-45234-6_28
Position: 53019 -> 10.1038/nmeth.2918
Position: 53020 -> 10.1016/b

Position: 53182 -> 10.1038/s41401-019-0315-8
Position: 53183 -> 10.1007/s00253-008-1730-9
Position: 53184 -> 10.3390/ijerph18010138
Position: 53185 -> 10.1186/s12917-019-1911-z
Position: 53186 -> 10.1101/2019.12.11.872861
Position: 53187 -> 10.1016/s0065-2318(10)63006-5
Position: 53188 -> 10.1007/978-3-030-52240-7_43
Position: 53189 -> 10.1007/s007050050541
Position: 53190 -> 10.1101/2020.03.22.20040782
Position: 53191 -> 10.1038/s41598-020-74776-5
Position: 53192 -> 10.1136/bmjpo-2020-000831
Position: 53193 -> 10.1093/cid/cit379
Position: 53194 -> 10.1186/s12864-020-06997-x
Position: 53195 -> 10.1016/j.jweia.2016.10.010
Position: 53196 -> 10.1016/j.dld.2020.05.002
Position: 53197 -> 10.1007/s10096-020-03989-3
Position: 53198 -> 10.1016/j.jtcvs.2008.08.044
Position: 53199 -> 10.1093/nar/gkm918
Position: 53200 -> 10.1016/b978-0-323-40181-4.00021-9
Position: 53201 -> 10.1177/2382120520951819
Position: 53202 -> 10.1038/s41401-019-0340-7
Position: 53203 -> 10.1136/bmj.m3320
Position: 53204

Position: 53365 -> 10.1101/2020.08.28.20184077
Position: 53366 -> 10.1136/rmdopen-2020-001332
Position: 53367 -> 10.1007/s10708-020-10303-3
Position: 53368 -> 10.7150/thno.49199
Position: 53369 -> 10.1093/ajcp/aqaa140
Position: 53370 -> 10.1016/j.ijbiomac.2020.08.193
Position: 53371 -> 10.1007/978-3-030-52237-7_45
Position: 53372 -> 10.1371/journal.pone.0146054
Position: 53373 -> 10.1016/j.janxdis.2020.102341
Position: 53374 -> 10.1101/2020.04.07.20056937
Position: 53375 -> 10.3390/genes11121478
Position: 53376 -> 10.1101/2020.06.27.174896
Position: 53377 -> 10.1016/s0929-6646(09)60017-6
Position: 53378 -> 10.1016/j.molstruc.2020.128741
Position: 53379 -> 10.1101/2020.05.16.20104182
Position: 53380 -> 10.1128/msphere.00966-20
Position: 53381 -> 10.1101/2020.11.13.20230060
Position: 53382 -> 10.1007/978-3-319-95240-6_1
Position: 53383 -> 10.1101/2020.09.01.20135194
Position: 53384 -> 10.1038/s41372-020-00863-0
Position: 53385 -> 10.1007/s00125-020-05292-4
Position: 53386 -> 10.1101/2021

Position: 53547 -> 10.1111/acem.14003
Position: 53548 -> 10.1161/strokeaha.120.031975
Position: 53549 -> 10.1093/clinchem/hvaa262
Position: 53550 -> 10.1101/2020.05.08.20095935
Position: 53551 -> 10.1016/j.jss.2020.09.029
Position: 53552 -> 10.3390/cells9091963
Position: 53553 -> 10.1002/lary.28770
Position: 53554 -> 10.1186/s12977-014-0127-3
Position: 53555 -> 10.1016/j.jiph.2020.07.011
Position: 53556 -> 10.3390/v13010117
Position: 53557 -> 10.1016/j.ijmmb.2020.11.002
Position: 53558 -> 10.1101/2020.09.23.309948
Position: 53559 -> 10.1101/2020.07.28.20163394
Position: 53560 -> 10.1016/j.bios.2020.112726
Position: 53561 -> 10.1007/s42438-020-00170-z
Position: 53562 -> 10.1186/s12917-020-02468-3
Position: 53563 -> 10.1016/j.jim.2020.112808
Position: 53564 -> 10.1016/j.mcp.2018.08.004
Position: 53565 -> 10.1186/s12879-020-05682-4
Position: 53566 -> 10.1101/2020.05.17.20104695
Position: 53567 -> 10.2147/idr.s277620
Position: 53568 -> 10.1186/1471-2334-13-273
Position: 53569 -> 10.1007/97

Position: 53731 -> 10.1101/2020.12.02.20242354
Position: 53732 -> 10.1016/j.legalmed.2020.101811
Position: 53733 -> 10.1093/intimm/dxg119
Position: 53734 -> 10.1101/2020.11.18.20232892
Position: 53735 -> 10.1007/s00431-020-03907-x
Position: 53736 -> 10.1093/ndt/gfq782
Position: 53737 -> 10.1007/978-3-319-74506-0_11
Position: 53738 -> 10.1002/jmv.25499
Position: 53739 -> 10.1186/s13059-020-02228-4
Position: 53740 -> 10.1186/s13244-020-00851-0
Position: 53741 -> 10.2147/prbm.s280825
Position: 53742 -> 10.3390/ijerph17238835
Position: 53743 -> 10.1101/2020.12.15.20248279
Position: 53744 -> 10.1038/s41598-018-19370-6
Position: 53745 -> 10.1016/j.ijid.2020.03.076
Position: 53746 -> 10.23750/abm.v91i2.9596
Position: 53747 -> 10.1101/2020.11.05.20226662
Position: 53748 -> 10.2217/fvl-2020-0368
Position: 53749 -> 10.1186/1743-422x-10-132
Position: 53750 -> 10.1016/j.virol.2008.01.018
Position: 53751 -> 10.1016/j.chaos.2020.110173
Position: 53752 -> 10.1101/2020.06.11.146241
Position: 53753 -> 

Position: 53914 -> 10.1016/j.jpeds.2019.04.058
Position: 53915 -> 10.1016/j.jmb.2020.166748
Position: 53916 -> 10.1101/2020.06.21.20136598
Position: 53917 -> 10.1016/j.dib.2020.106687
Position: 53918 -> 10.2147/idr.s282792
Position: 53919 -> 10.1016/j.japh.2021.01.023
Position: 53920 -> 10.1371/journal.pone.0244476
Position: 53921 -> 10.1371/journal.ppat.1009220
Position: 53922 -> 10.1101/246991
Position: 53923 -> 10.1155/2018/1538127
Position: 53924 -> 10.1016/j.virusres.2014.01.025
Position: 53925 -> 10.1186/s40101-020-00234-w
Position: 53926 -> 10.1007/s10096-017-3144-z
Position: 53927 -> 10.1101/2020.08.08.20170746
Position: 53928 -> 10.1136/bmjstel-2020-000626
Position: 53929 -> 10.1101/2020.08.26.269043
Position: 53930 -> 10.1057/978-1-137-53675-4_4
Position: 53931 -> 10.1007/978-1-62703-601-6_1
Position: 53932 -> 10.3389/fphar.2019.00959
Position: 53933 -> 10.1007/s11259-017-9701-1
Position: 53934 -> 10.1017/dmp.2020.361
Position: 53935 -> 10.1038/srep35873
Position: 53936 -> 10

Position: 54097 -> 10.1007/s13209-020-00226-0
Position: 54098 -> 10.1007/s12564-020-09658-6
Position: 54099 -> 10.1007/bf01322667
Position: 54100 -> 10.1016/j.dsp.2020.102830
Position: 54101 -> 10.3390/ijerph17134866
Position: 54102 -> 10.1007/s00261-020-02865-0
Position: 54103 -> 10.1111/j.1939-1676.2004.tb02565.x
Position: 54104 -> 10.1016/j.jpsychores.2020.110253
Position: 54105 -> 10.1371/journal.pone.0056278
Position: 54106 -> 10.1093/cid/ciaa1592
Position: 54107 -> 10.1038/s41398-020-00913-3
Position: 54108 -> 10.1101/2020.06.24.20139634
Position: 54109 -> 10.1016/j.clcc.2020.07.006
Position: 54110 -> 10.3390/ijerph17228696
Position: 54111 -> 10.1101/2020.09.17.20196444
Position: 54112 -> 10.1016/j.jhin.2018.05.021
Position: 54113 -> 10.12688/f1000research.26253.1
Position: 54114 -> 10.3390/healthcare9010050
Position: 54115 -> 10.1016/j.bbi.2020.05.038
Position: 54116 -> 10.1016/j.tru.2021.100037
Position: 54117 -> 10.1371/journal.pone.0241266
Position: 54118 -> 10.1186/s12951-02

Position: 54278 -> 10.3233/jad-200547
Position: 54279 -> 10.3201/eid2301.161081
Position: 54280 -> 10.3390/ijerph17165843
Position: 54281 -> 10.1016/j.antiviral.2017.08.007
Position: 54282 -> 10.1002/rth2.12391
Position: 54283 -> 10.1101/2020.02.21.960492
Position: 54284 -> 10.1038/srep34108
Position: 54285 -> 10.1007/978-3-662-62195-0_7
Position: 54286 -> 10.1016/j.semarthrit.2020.09.008
Position: 54287 -> 10.1101/2020.11.17.20233213
Position: 54288 -> 10.1007/s10290-020-00402-1
Position: 54289 -> 10.1101/2020.12.09.417121
Position: 54290 -> 10.3389/fmed.2020.00518
Position: 54291 -> 10.3390/ijerph17218224
Position: 54292 -> 10.1371/journal.pone.0243692
Position: 54293 -> 10.1016/j.virol.2020.09.008
Position: 54294 -> 10.1186/s12889-020-09232-0
Position: 54295 -> 10.1007/s00705-005-0598-6
Position: 54296 -> 10.1016/j.cppeds.2020.100872
Position: 54297 -> 10.1128/msphere.00323-17
Position: 54298 -> 10.1111/pbi.13468
Position: 54299 -> 10.3390/diagnostics10080593
Position: 54300 -> 10.1

Position: 54462 -> 10.1007/s00429-020-02154-y
Position: 54463 -> 10.1101/2020.09.16.20195917
Position: 54464 -> 10.1186/s12865-014-0041-4
Position: 54465 -> 10.1186/s12985-016-0488-4
Position: 54466 -> 10.1371/journal.ppat.1002155
Position: 54467 -> 10.1007/s12630-020-01686-5
Position: 54468 -> 10.1007/978-3-319-95240-6_8
Position: 54469 -> 10.1038/s41598-020-77929-8
Position: 54470 -> 10.1007/s11940-020-0618-6
Position: 54471 -> 10.1371/journal.pone.0170774
Position: 54472 -> 10.1109/iciafs.2012.6419885
Position: 54473 -> 10.4049/immunohorizons.2000051
Position: 54474 -> 10.1101/2020.05.25.20111757
Position: 54475 -> 10.1016/j.wneu.2020.08.200
Position: 54476 -> 10.1186/1479-5876-5-25
Position: 54477 -> 10.1038/srep28866
Position: 54478 -> 10.1186/s13662-020-03141-7
Position: 54479 -> 10.1101/2020.05.08.083816
Position: 54480 -> 10.1007/s10654-020-00680-x
Position: 54481 -> 10.1016/j.ijsu.2020.08.037
Position: 54482 -> 10.1016/j.imr.2020.100490
Position: 54483 -> 10.1101/072652
Positi

Position: 54643 -> 10.1038/3887
Position: 54644 -> 10.1016/b978-0-08-097037-0.00053-1
Position: 54645 -> 10.1016/j.arbres.2020.05.020
Position: 54646 -> 10.1371/journal.pone.0235773
Position: 54647 -> 10.1016/j.annals.2020.103041
Position: 54648 -> 10.1038/s41598-020-77172-1
Position: 54649 -> 10.1590/2175-8239-jbn-2020-s107
Position: 54650 -> 10.1007/978-1-4614-8196-6_32
Position: 54651 -> 10.1136/bmjopen-2020-039489
Position: 54652 -> 10.1007/s11262-011-0695-4
Position: 54653 -> 10.1038/s41598-018-21058-w
Position: 54654 -> 10.1002/aepp.13128
Position: 54655 -> 10.1016/j.chiabu.2020.104739
Position: 54656 -> 10.1101/2020.05.08.20095448
Position: 54657 -> 10.1186/s12959-020-00252-9
Position: 54658 -> 10.1016/j.anpedi.2020.07.019
Position: 54659 -> 10.1038/s41467-020-20235-8
Position: 54660 -> 10.1016/j.jhin.2010.09.037
Position: 54661 -> 10.1371/journal.pbio.3000869
Position: 54662 -> 10.1093/jac/dkm157
Position: 54663 -> 10.18632/oncotarget.8097
Position: 54664 -> 10.1007/s10649-020-

Position: 54824 -> 10.1016/j.jcv.2007.07.001
Position: 54825 -> 10.1007/s12035-020-02213-9
Position: 54826 -> 10.1007/s00520-020-05740-2
Position: 54827 -> 10.1038/s41407-020-0404-z
Position: 54828 -> 10.1016/j.jcv.2003.09.011
Position: 54829 -> 10.1007/s11701-020-01100-8
Position: 54830 -> 10.3389/fpubh.2020.00252
Position: 54831 -> 10.1016/j.sleep.2020.12.003
Position: 54832 -> 10.1016/j.vaccine.2019.03.046
Position: 54833 -> 10.1111/j.1365-2885.2007.00842.x
Position: 54834 -> 10.3389/fimmu.2019.00727
Position: 54835 -> 10.3390/md17060359
Position: 54836 -> 10.1016/b978-0-12-817133-2.00018-5
Position: 54837 -> 10.1101/2020.03.22.20040071
Position: 54838 -> 10.1016/j.csbj.2020.03.025
Position: 54839 -> 10.1016/j.jhin.2018.03.031
Position: 54840 -> 10.1101/2020.07.28.20164012
Position: 54841 -> 10.1007/s10111-019-00587-y
Position: 54842 -> 10.1186/s41182-020-00217-8
Position: 54843 -> 10.3390/ijerph17218017
Position: 54844 -> 10.1371/journal.pone.0095911
Position: 54845 -> 10.1016/j.je

Position: 55006 -> 10.1111/ced.14277
Position: 55007 -> 10.1098/rsif.2017.0838
Position: 55008 -> 10.5223/pghn.2021.24.1.65
Position: 55009 -> 10.1016/j.jqsrt.2020.107496
Position: 55010 -> 10.1016/j.jfms.2011.05.016
Position: 55011 -> 10.1007/s00249-009-0478-1
Position: 55012 -> 10.1038/s41598-020-62084-x
Position: 55013 -> 10.5365/wpsar.2016.7.3.009
Position: 55014 -> 10.1002/geo2.66
Position: 55015 -> 10.1007/s00705-019-04167-3
Position: 55016 -> 10.3390/s20164553
Position: 55017 -> 10.1016/j.virusres.2014.12.030
Position: 55018 -> 10.1101/2020.04.19.20071704
Position: 55019 -> 10.1007/978-3-030-50423-6_9
Position: 55020 -> 10.1007/7355_2018_58
Position: 55021 -> 10.1016/j.socscimed.2005.07.028
Position: 55022 -> 10.1186/s13052-020-00917-1
Position: 55023 -> 10.3389/fpsyt.2020.584462
Position: 55024 -> 10.1007/s10551-020-04703-1
Position: 55025 -> 10.1007/978-1-4614-3970-7_38
Position: 55026 -> 10.1016/j.orgdyn.2009.07.005
Position: 55027 -> 10.1016/j.lisr.2011.11.002
Position: 5502

Position: 55189 -> 10.1007/bf02905277
Position: 55190 -> 10.1093/jac/dkn303
Position: 55191 -> 10.3389/fendo.2020.596898
Position: 55192 -> 10.1016/j.diabres.2020.108467
Position: 55193 -> 10.1007/s40263-020-00742-4
Position: 55194 -> 10.1101/2020.03.16.20034934
Position: 55195 -> 10.1101/2020.07.20.20157743
Position: 55196 -> 10.1007/978-1-4419-1327-2_1
Position: 55197 -> 10.1016/j.ajem.2020.05.073
Position: 55198 -> 10.1007/978-3-319-95111-9_22
Position: 55199 -> 10.1186/1471-2105-4-43
Position: 55200 -> 10.1186/s13063-020-04981-0
Position: 55201 -> 10.1101/2020.05.26.20113373
Position: 55202 -> 10.1111/j.1742-6723.2004.00606.x
Position: 55203 -> 10.3892/mmr.2020.11400
Position: 55204 -> 10.1111/jors.12510
Position: 55205 -> 10.1097/wno.0000000000001038
Position: 55206 -> 10.1186/s12985-021-01492-5
Position: 55207 -> 10.1101/2020.11.05.20223289
Position: 55208 -> 10.1007/s10140-020-01808-y
Position: 55209 -> 10.1101/2020.07.03.186825
Position: 55210 -> 10.1186/1471-2334-8-117
Positio

Position: 55370 -> 10.1016/j.jdent.2020.103480
Position: 55371 -> 10.1016/j.patrec.2020.09.010
Position: 55372 -> 10.5765/jkacap.200046
Position: 55373 -> 10.1093/infdis/jix209
Position: 55374 -> 10.3390/biom10070998
Position: 55375 -> 10.1016/j.dsx.2020.12.040
Position: 55376 -> 10.1101/2021.01.20.21249279
Position: 55377 -> 10.1038/s41386-020-0750-8
Position: 55378 -> 10.1007/s00417-020-04694-9
Position: 55379 -> 10.1101/2020.12.24.424332
Position: 55380 -> 10.1186/s13756-016-0152-1
Position: 55381 -> 10.1038/s41598-021-82212-5
Position: 55382 -> 10.1007/s40123-020-00327-w
Position: 55383 -> 10.3390/ijms21218001
Position: 55384 -> 10.1007/s43683-020-00003-2
Position: 55385 -> 10.1101/2020.07.23.20160887
Position: 55386 -> 10.14202/vetworld.2018.977-985
Position: 55387 -> 10.1101/2020.08.13.20174078
Position: 55388 -> 10.1007/978-0-387-34163-7_17
Position: 55389 -> 10.1007/978-3-030-44914-8_18
Position: 55390 -> 10.1186/1471-2229-11-178
Position: 55391 -> 10.1093/ve/veaa053
Position: 

In [None]:
# dict_new_extra_info_saver = dict()
# i = len_df_current_extra_info
# while i < 47683:
#     #print("Position: " + str(i) + " -> " +  doi_counted.index[i])
#     dict_new_extra_info_saver[i] = dict_new_extra_info[i]
#     i = i + 1 

# dict_new_extra_info = dict_new_extra_info_saver

In [None]:
dict_new_extra_info

In [None]:
df_new_extra_info = pd.DataFrame(dict_new_extra_info)

In [None]:
df_new_extra_info

In [None]:
df_new_extra_transposed = df_new_extra_info.T
df_new_extra_transposed

In [None]:
df_combined_extra_info = pd.concat([df_current_extra_info, df_new_extra_transposed],ignore_index=True)

In [None]:
df_combined_extra_info.to_pickle('extra_info_CS5099.pkl')
df_combined_extra_info

In [None]:
entry = AffiliationsFromScopusByDOI(client, '10.1086/605034')
entry

In [None]:
print (json.dumps(entry, ensure_ascii=False, indent=4))

In [None]:
df_test = pd.DataFrame.from_dict(entry['affiliation'], orient='columns')
df_test

In [None]:
entry['coredata']

In [None]:
## ScienceDirect (full-text) document example using DOI
doi_doc = FullDoc(doi = '10.1016/S1525-1578(10)60571-5')
if doi_doc.read(client):
    print ("doi_doc.title: ", doi_doc.title)
    doi_doc.write()   
else:
    print ("Read document failed.")
doi_doc