# CORD-19-collect-scopus-data

In general, this jupyter notebook is designated to collect additional data via scopus to enbroaden the CORD19 dataset: 
https://datadryad.org/stash/dataset/doi:10.5061/dryad.vmcvdncs0

First, relevant packages must be imported to the Notebook.

In [1]:
import numpy as np
import pandas as pd
import csv
import ast
import collections
import matplotlib.pyplot as plt
import Levenshtein as lev
from fuzzywuzzy import fuzz 
import datetime
import matplotlib.pyplot as plt
import re
from urllib.parse import urlparse
from collections import Counter

from elsapy.elsclient import ElsClient
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

import time # for sleep
from pybtex.database import parse_file, BibliographyData, Entry
import json
from elsapy.elsclient import ElsClient
from elsapy.elsdoc import AbsDoc
from elsapy.elssearch import ElsSearch

In [2]:
CORD19_CSV = pd.read_csv('../data/cord-19/CORD19_software_mentions.csv')

In [3]:
len(CORD19_CSV['doi'])

77448

In [4]:
doi = CORD19_CSV['doi']
doi

0                                 NaN
1          10.1016/j.regg.2021.01.002
2           10.1016/j.rec.2020.08.002
3        10.1016/j.vetmic.2006.11.026
4                   10.3390/v12080849
                     ...             
77443      10.1007/s11229-020-02869-9
77444                             NaN
77445     10.1101/2020.05.13.20100206
77446      10.1007/s42991-020-00052-8
77447     10.1101/2020.09.14.20194670
Name: doi, Length: 77448, dtype: object

In [5]:
#delete Nans
doi_counted = doi.value_counts()
doi_counted

10.1016/j.dsx.2020.04.012           2
10.31729/jnma.5498                  2
10.1016/j.virol.2005.12.006         1
10.1128/mra.00383-20                1
10.1016/j.annonc.2020.08.1812       1
                                   ..
10.1016/j.earlhumdev.2020.105258    1
10.1111/irv.12433                   1
10.1101/2020.10.23.20218214         1
10.1007/s00134-020-06226-9          1
10.1038/s41583-021-00428-w          1
Name: doi, Length: 74302, dtype: int64

In [6]:
def AffiliationsFromScopusByDOI(client, doi):
    """obtain addiotional paper information from scopus by doi
    """
    doc_srch = ElsSearch("DOI("+doi+")",'scopus')
    doc_srch.execute(client, get_all = True)
    #print ("doc_srch has", len(doc_srch.results), "results.")
    #print(doc_srch.results)
    try:
        scopus_id=doc_srch.results[0]["dc:identifier"].split(":")[1]
        scp_doc = AbsDoc(scp_id = scopus_id)
        if scp_doc.read(client):
            # print ("scp_doc.title: ", scp_doc.title)
            scp_doc.write()   
        else:
            print ("Read document failed.")
        # print(scp_doc.data["affiliation"])
        return scp_doc.data
    except:
        return None

In [7]:
## Load configuration
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
## Initialize client
client = ElsClient(config['apikey'])
# https://api.elsevier.com/content/search/scopus?query=DOI(10.1109/MCOM.2016.7509373)&apiKey=6d485ef1fe1408712f37e8a783a285a4

In [8]:
df_current_extra_info = pd.read_pickle('extra_info_CS5099.pkl')
df_current_extra_info

Unnamed: 0,affiliation,coredata
0,"[{'affiliation-city': None, 'affilname': 'Pata...","{'srctype': 'j', 'prism:issueIdentifier': '230..."
1,"[{'affiliation-city': 'New Delhi', 'affilname'...","{'srctype': 'j', 'eid': '2-s2.0-85083171050', ..."
2,,
3,"[{'affiliation-city': 'London', 'affilname': '...","{'srctype': 'j', 'eid': '2-s2.0-79953057246', ..."
4,"[{'affiliation-city': 'Baoding', 'affilname': ...","{'srctype': 'j', 'prism:issueIdentifier': '7',..."
...,...,...
18471,"[{'affiliation-city': 'Rehovot', 'affilname': ...","{'srctype': 'j', 'eid': '2-s2.0-85100044416', ..."
18472,,"{'srctype': 'j', 'eid': '2-s2.0-33645723465', ..."
18473,"[{'affiliation-city': 'Rockville', 'affilname'...","{'srctype': 'j', 'eid': '2-s2.0-85096082670', ..."
18474,,


In [9]:
len_df_current_extra_info = len(df_current_extra_info)
len_df_current_extra_info

18476

In [None]:
%%time
i = len_df_current_extra_info
dict_new_extra_info = dict()
len_dois = len(doi_counted)
while i < len_dois:
    print("Position: " + str(i) + " -> " +  doi_counted.index[i])
    dict_new_extra_info[i] = AffiliationsFromScopusByDOI(client, doi_counted.index[i])
    i = i + 1 

Position: 18476 -> 10.1101/2020.03.30.015164
Position: 18477 -> 10.1155/2020/2396569
Position: 18478 -> 10.1177/0333102420965139
Position: 18479 -> 10.3389/fneur.2020.00806
Position: 18480 -> 10.1007/978-1-59745-181-9_20
Position: 18481 -> 10.1007/s11356-020-10932-8
Position: 18482 -> 10.1371/journal.pntd.0001359
Position: 18483 -> 10.1038/s41467-020-20608-z
Position: 18484 -> 10.1007/s40692-020-00181-6
Position: 18485 -> 10.1111/apt.15951
Position: 18486 -> 10.1111/jan.14718
Position: 18487 -> 10.1007/s42452-020-3173-6
Position: 18488 -> 10.1186/s12917-020-02730-8
Position: 18489 -> 10.1021/acs.jpcb.0c05621
Position: 18490 -> 10.3390/ijerph17176317
Position: 18491 -> 10.1101/2020.05.01.20087023
Position: 18492 -> 10.1101/2021.01.08.423993
Position: 18493 -> 10.1007/978-3-030-53956-6_31
Position: 18494 -> 10.2144/btn-2020-0096
Position: 18495 -> 10.1056/nejmc2008646
Position: 18496 -> 10.1101/2021.01.24.21250418
Position: 18497 -> 10.1016/j.jinf.2020.09.010
Position: 18498 -> 10.7326/m

Position: 18658 -> 10.1017/dmp.2020.313
Position: 18659 -> 10.1101/2020.05.08.20095968
Position: 18660 -> 10.1093/infdis/jiy018
Position: 18661 -> 10.1016/j.jtos.2020.08.003
Position: 18662 -> 10.1002/jmv.26682
Position: 18663 -> 10.3390/v11010058
Position: 18664 -> 10.1016/j.socscimed.2020.113441
Position: 18665 -> 10.1016/j.rmed.2020.106120
Position: 18666 -> 10.3390/jcm9113521
Position: 18667 -> 10.1016/j.jen.2020.06.009
Position: 18668 -> 10.1023/a:1016378226861
Position: 18669 -> 10.1101/2020.07.04.20145870
Position: 18670 -> 10.3390/ijms21103643
Position: 18671 -> 10.1080/07391102.2020.1850360
Position: 18672 -> 10.1016/j.virusres.2017.08.001
Position: 18673 -> 10.1128/jvi.01734-19
Position: 18674 -> 10.1016/j.diagmicrobio.2020.115181
Position: 18675 -> 10.1101/2020.04.11.037473
Position: 18676 -> 10.1101/2020.09.24.20200766
Position: 18677 -> 10.1159/000509065
Position: 18678 -> 10.3390/ijerph18020580
Position: 18679 -> 10.1007/978-3-030-52200-1_1
Position: 18680 -> 10.3390/ijer

Position: 18841 -> 10.1101/2020.05.10.20097295
Position: 18842 -> 10.1016/j.bios.2013.07.037
Position: 18843 -> 10.1007/s00436-020-06786-1
Position: 18844 -> 10.3390/biology9090296
Position: 18845 -> 10.1111/sifp.12138
Position: 18846 -> 10.1093/jpids/piaa162
Position: 18847 -> 10.1136/bmjopen-2020-039088
Position: 18848 -> 10.1016/j.dcn.2020.100899
Position: 18849 -> 10.1684/ecn.2020.0448
Position: 18850 -> 10.1101/2020.05.28.122663
Position: 18851 -> 10.1002/adma.202005637
Position: 18852 -> 10.3399/bjgpopen20x101069
Position: 18853 -> 10.1016/j.meegid.2020.104556
Position: 18854 -> 10.3390/cancers13020266
Position: 18855 -> 10.1093/ibd/izaa303
Position: 18856 -> 10.1111/ajt.16346
Position: 18857 -> 10.3390/jof2020016
Position: 18858 -> 10.1101/2020.10.10.20210427
Position: 18859 -> 10.1007/s42438-020-00128-1
Position: 18860 -> 10.1007/978-3-030-50433-5_33
Position: 18861 -> 10.2147/jpr.s285835
Position: 18862 -> 10.1016/j.xcrm.2020.100059
Position: 18863 -> 10.1038/s41598-018-20305-

Position: 19024 -> 10.1016/j.ijheh.2020.113671
Position: 19025 -> 10.3389/fimmu.2019.01821
Position: 19026 -> 10.1007/s10393-016-1183-z
Position: 19027 -> 10.1186/s12889-020-09079-5
Position: 19028 -> 10.1038/s41598-020-65405-2
Position: 19029 -> 10.1101/2020.03.24.20042408
Position: 19030 -> 10.1101/2020.03.23.20041608
Position: 19031 -> 10.1016/j.heliyon.2020.e04929
Position: 19032 -> 10.1101/2020.11.17.20233452
Position: 19033 -> 10.1016/j.jmb.2017.04.011
Position: 19034 -> 10.1016/j.jclinepi.2020.12.002
Position: 19035 -> 10.1016/j.ijid.2020.05.071
Position: 19036 -> 10.1007/s10753-014-0048-2
Position: 19037 -> 10.1016/j.cell.2019.04.035
Position: 19038 -> 10.1007/978-3-030-49165-9_7
Position: 19039 -> 10.1016/b978-0-12-801477-6.00010-1
Position: 19040 -> 10.1111/j.1750-2659.2009.00095.x
Position: 19041 -> 10.1371/journal.pbio.0030324
Position: 19042 -> 10.1186/s12917-018-1756-x
Position: 19043 -> 10.1365/s40702-020-00684-x
Position: 19044 -> 10.1177/0300060520977593
Position: 1904

Position: 19203 -> 10.1007/s12098-020-03311-1
Position: 19204 -> 10.1007/s10096-020-04137-7
Position: 19205 -> 10.1007/s10479-020-03695-5
Position: 19206 -> 10.1007/s10072-020-04693-y
Position: 19207 -> 10.1007/s00701-020-04544-x
Position: 19208 -> 10.1016/j.tet.2020.131373
Position: 19209 -> 10.1093/ofid/ofab001
Position: 19210 -> 10.1111/j.1600-0625.2006.00527.x
Position: 19211 -> 10.1038/srep24257
Position: 19212 -> 10.1101/2020.05.13.092759
Position: 19213 -> 10.1002/iroh.200510822
Position: 19214 -> 10.1016/j.endien.2020.09.004
Position: 19215 -> 10.1007/s38311-020-0612-3
Position: 19216 -> 10.1007/s00580-010-1084-4
Position: 19217 -> 10.1007/s10875-020-00764-z
Position: 19218 -> 10.1016/j.vaccine.2008.01.006
Position: 19219 -> 10.1101/2020.05.27.117127
Position: 19220 -> 10.1016/j.jep.2020.113319
Position: 19221 -> 10.1016/j.jth.2020.100942
Position: 19222 -> 10.1111/tbed.13419
Position: 19223 -> 10.1016/j.trip.2020.100222
Position: 19224 -> 10.3389/fnhum.2020.598435
Position: 19

Position: 19384 -> 10.1186/s12879-016-1410-9
Position: 19385 -> 10.3390/nu12092899
Position: 19386 -> 10.1093/aje/kwt133
Position: 19387 -> 10.1371/journal.ppat.1003565
Position: 19388 -> 10.1016/s1936-878x(20)30777-4
Position: 19389 -> 10.1101/2020.05.12.20099267
Position: 19390 -> 10.1101/2020.07.08.192666
Position: 19391 -> 10.1093/nar/gkaa1005
Position: 19392 -> 10.1007/s11896-021-09431-4
Position: 19393 -> 10.1007/978-3-030-53956-6_17
Position: 19394 -> 10.1007/s11192-020-03706-z
Position: 19395 -> 10.1007/s00280-020-04172-3
Position: 19396 -> 10.1007/978-3-030-48648-8_12
Position: 19397 -> 10.1186/s13071-015-0707-7
Position: 19398 -> 10.1038/s41540-019-0113-4
Position: 19399 -> 10.3390/ijerph17238780
Position: 19400 -> 10.2147/idr.s213296
Position: 19401 -> 10.1038/s41598-018-26765-y
Position: 19402 -> 10.1002/jmv.21708
Position: 19403 -> 10.1016/j.tibs.2015.03.006
Position: 19404 -> 10.1016/j.ijid.2021.01.065
Position: 19405 -> 10.1111/cea.12482
Position: 19406 -> 10.23750/abm.v

Position: 19569 -> 10.1038/s41598-018-31607-y
Position: 19570 -> 10.1590/0037-8682-0167-2020
Position: 19571 -> 10.2147/dddt.s133127
Position: 19572 -> 10.1101/2020.09.26.20202457
Position: 19573 -> 10.1016/j.niox.2020.07.003
Position: 19574 -> 10.1177/1178632920938674
Position: 19575 -> 10.1101/2020.05.07.20093864
Position: 19576 -> 10.1186/s12875-020-01359-8
Position: 19577 -> 10.15585/mmwr.mm6938e1
Position: 19578 -> 10.1101/2020.11.25.20238766
Position: 19579 -> 10.3390/v9050121
Position: 19580 -> 10.1095/biolreprod.105.044776
Position: 19581 -> 10.1038/s41598-020-78407-x
Position: 19582 -> 10.1056/nejmoa2030340
Position: 19583 -> 10.1186/1756-0500-7-504
Position: 19584 -> 10.1101/2020.04.25.20079624
Position: 19585 -> 10.3201/eid2007.140294
Position: 19586 -> 10.3390/ijerph17228659
Position: 19587 -> 10.3390/brainsci10070453
Position: 19588 -> 10.1016/j.scitotenv.2020.139495
Position: 19589 -> 10.1007/s11109-020-09675-6
Position: 19590 -> 10.1016/j.jstrokecerebrovasdis.2020.105175

Position: 19749 -> 10.1007/978-3-642-38951-1_9
Position: 19750 -> 10.3389/fpsyt.2020.588137
Position: 19751 -> 10.1016/j.ajic.2020.04.002
Position: 19752 -> 10.1111/all.14665
Position: 19753 -> 10.1016/j.tmaid.2020.101636
Position: 19754 -> 10.1016/j.ijsu.2020.05.022
Position: 19755 -> 10.1007/s40292-020-00409-7
Position: 19756 -> 10.1101/2020.09.13.295493
Position: 19757 -> 10.1101/2021.01.11.426253
Position: 19758 -> 10.1016/j.arbres.2020.05.032
Position: 19759 -> 10.1101/2020.12.10.20247130
Position: 19760 -> 10.1371/journal.pntd.0004456
Position: 19761 -> 10.1016/j.meegid.2021.104734
Position: 19762 -> 10.1371/journal.pone.0062833
Position: 19763 -> 10.5195/jmla.2020.1039
Position: 19764 -> 10.1007/s13218-020-00693-4
Position: 19765 -> 10.1007/s11620-020-00546-0
Position: 19766 -> 10.1007/978-3-319-22813-6_11
Position: 19767 -> 10.1016/j.cgh.2020.10.010
Position: 19768 -> 10.1038/s41467-020-14363-4
Position: 19769 -> 10.1016/b978-044453010-3.50014-1
Position: 19770 -> 10.1017/cha.2

Position: 19929 -> 10.1007/s00120-020-01241-6
Position: 19930 -> 10.1186/s13287-020-02035-5
Position: 19931 -> 10.1101/2020.05.07.20093849
Position: 19932 -> 10.1097/mat.0000000000001191
Position: 19933 -> 10.1016/j.jep.2021.113871
Position: 19934 -> 10.1016/j.jviromet.2020.114032
Position: 19935 -> 10.3758/s13414-020-02187-9
Position: 19936 -> 10.1016/j.jviromet.2013.08.002
Position: 19937 -> 10.1016/j.chaos.2020.110189
Position: 19938 -> 10.1186/1475-2875-12-392
Position: 19939 -> 10.1371/journal.pone.0239797
Position: 19940 -> 10.1016/s2665-9913(20)30287-3
Position: 19941 -> 10.1038/s41598-019-55254-z
Position: 19942 -> 10.1101/2020.08.13.249086
Position: 19943 -> 10.1016/j.virol.2014.06.040
Position: 19944 -> 10.1016/j.tim.2014.02.011
Position: 19945 -> 10.3389/fpubh.2020.562885
Position: 19946 -> 10.1186/s12967-020-02465-y
Position: 19947 -> 10.1007/s00405-020-06582-8
Position: 19948 -> 10.1016/j.vetimm.2009.10.026
Position: 19949 -> 10.1016/j.virusres.2011.02.015
Position: 19950 

Position: 20109 -> 10.1093/jtm/taaa177
Position: 20110 -> 10.1002/psc.617
Position: 20111 -> 10.7717/peerj.798
Position: 20112 -> 10.12688/f1000research.6085.2
Position: 20113 -> 10.4048/jbc.2020.23.e56
Position: 20114 -> 10.1101/2020.05.26.20113381
Position: 20115 -> 10.1007/978-3-030-58805-2_7
Position: 20116 -> 10.1007/978-3-030-47436-2_2
Position: 20117 -> 10.1371/journal.ppat.1008762
Position: 20118 -> 10.1023/a:1018844608861
Position: 20119 -> 10.1038/s41598-020-74776-5
Position: 20120 -> 10.2478/s11658-008-0024-7
Position: 20121 -> 10.1016/b978-2-294-70452-9.50013-5
Position: 20122 -> 10.1101/2021.01.08.21249439
Position: 20123 -> 10.3390/vetsci7040187
Position: 20124 -> 10.3390/jfmk5030046
Position: 20125 -> 10.1016/j.buildenv.2020.107313
Position: 20126 -> 10.1101/2020.10.17.344069
Position: 20127 -> 10.1016/j.landurbplan.2011.02.018
Position: 20128 -> 10.1101/2020.05.27.20114744
Position: 20129 -> 10.1016/j.healthplace.2020.102406
Position: 20130 -> 10.1101/2021.01.26.2125031

Position: 20290 -> 10.1007/978-3-319-99375-1_66
Position: 20291 -> 10.1101/2020.11.17.20220681
Position: 20292 -> 10.1016/j.virol.2017.06.019
Position: 20293 -> 10.1016/j.puhe.2018.07.002
Position: 20294 -> 10.1016/s1473-3099(13)70690-x
Position: 20295 -> 10.1186/s10020-020-00221-y
Position: 20296 -> 10.1101/2020.09.11.20192963
Position: 20297 -> 10.1038/srep13872
Position: 20298 -> 10.1007/s40847-020-00113-1
Position: 20299 -> 10.1016/j.medcli.2020.06.006
Position: 20300 -> 10.1002/pro.3150
Position: 20301 -> 10.1101/2020.09.09.20191122
Position: 20302 -> 10.1186/s12917-014-0176-9
Position: 20303 -> 10.21203/rs.3.rs-72427/v1
Position: 20304 -> 10.1186/1471-2334-13-257
Position: 20305 -> 10.1016/j.pharmthera.2020.107697
Position: 20306 -> 10.1007/s41745-020-00200-6
Position: 20307 -> 10.1101/2020.12.09.417519
Position: 20308 -> 10.1016/j.nbt.2020.09.004
Position: 20309 -> 10.1007/978-3-8274-2734-2_2
Position: 20310 -> 10.1016/j.tourman.2020.104233
Position: 20311 -> 10.1016/j.resuscita

Position: 20471 -> 10.1007/s13238-020-00708-8
Position: 20472 -> 10.1101/2020.06.23.20138289
Position: 20473 -> 10.3389/fmed.2020.00250
Position: 20474 -> 10.1371/journal.ppat.1008111
Position: 20475 -> 10.1371/journal.pone.0244968
Position: 20476 -> 10.3389/fmolb.2020.582499
Position: 20477 -> 10.1021/acsptsci.0c00093
Position: 20478 -> 10.1007/s00405-020-06496-5
Position: 20479 -> 10.1101/2020.04.10.036335
Position: 20480 -> 10.1007/s00431-020-03747-9
Position: 20481 -> 10.1093/milmed/usaa531
Position: 20482 -> 10.1136/bmjopen-2019-035632
Position: 20483 -> 10.1101/2020.12.18.20245068
Position: 20484 -> 10.1007/s42770-020-00355-5
Position: 20485 -> 10.1055/s-0040-1715279
Position: 20486 -> 10.1007/s00705-020-04669-5
Position: 20487 -> 10.1007/s10612-020-09538-w
Position: 20488 -> 10.3389/fneur.2020.00509
Position: 20489 -> 10.1017/s0950268820002782
Position: 20490 -> 10.1016/j.jcf.2020.08.013
Position: 20491 -> 10.1007/978-3-030-30281-8_17
Position: 20492 -> 10.1101/2020.11.20.202359

Position: 20652 -> 10.1101/2020.05.26.117069
Position: 20653 -> 10.1002/elps.201900167
Position: 20654 -> 10.1186/s12954-020-00451-7
Position: 20655 -> 10.1007/s11469-020-00414-w
Position: 20656 -> 10.1101/2020.10.08.20208751
Position: 20657 -> 10.1007/s10936-020-09752-1
Position: 20658 -> 10.1016/j.outlook.2020.12.017
Position: 20659 -> 10.1007/s11063-021-10425-w
Position: 20660 -> 10.1136/jech.2010.119875
Position: 20661 -> 10.3343/alm.2012.32.1.44
Position: 20662 -> 10.1007/s11262-019-01701-y
Position: 20663 -> 10.1017/dmp.2020.68
Position: 20664 -> 10.1007/s10597-020-00724-2
Position: 20665 -> 10.1016/j.physleta.2015.09.017
Position: 20666 -> 10.15537/smj.2015.1.11318
Position: 20667 -> 10.1038/s41423-020-00571-x
Position: 20668 -> 10.1007/s00438-020-01684-8
Position: 20669 -> 10.3390/children7110207
Position: 20670 -> 10.1101/2020.11.09.20228791
Position: 20671 -> 10.1016/j.wasman.2008.08.023
Position: 20672 -> 10.1016/s2665-9913(20)30390-8
Position: 20673 -> 10.1101/2020.06.20.16

Position: 20834 -> 10.1002/alr.22602
Position: 20835 -> 10.1093/nar/gkq554
Position: 20836 -> 10.1093/ofid/ofaa401
Position: 20837 -> 10.3390/molecules25245787
Position: 20838 -> 10.1038/s41398-020-00913-3
Position: 20839 -> 10.2147/ndt.s277224
Position: 20840 -> 10.1016/j.rinp.2020.103560
Position: 20841 -> 10.1016/j.ajic.2017.07.019
Position: 20842 -> 10.1007/s12115-020-00510-6
Position: 20843 -> 10.1007/s00251-020-01179-1
Position: 20844 -> 10.1177/2515690x20960659
Position: 20845 -> 10.3389/fcell.2019.00200
Position: 20846 -> 10.1016/j.vetmic.2016.11.014
Position: 20847 -> 10.1177/1948550620936439
Position: 20848 -> 10.1101/2020.12.04.406421
Position: 20849 -> 10.1016/j.dld.2020.11.029
Position: 20850 -> 10.1016/j.amsu.2020.09.044
Position: 20851 -> 10.1016/j.elerap.2020.101004
Position: 20852 -> 10.1016/s1473-3099(14)70827-8
Position: 20853 -> 10.1016/b978-008044992-0.00501-0
Position: 20854 -> 10.1016/j.ajem.2020.10.009
Position: 20855 -> 10.3389/fphar.2020.01066
Position: 20856 

Position: 21018 -> 10.1002/lrh2.10251
Position: 21019 -> 10.4142/jvs.2019.20.e1
Position: 21020 -> 10.3390/ijerph17186731
Position: 21021 -> 10.1101/2020.08.06.20148288
Position: 21022 -> 10.1007/978-3-030-47426-3_67
Position: 21023 -> 10.1007/978-3-319-33900-9_13
Position: 21024 -> 10.1016/j.buildenv.2007.01.035
Position: 21025 -> 10.1097/md.0000000000022260
Position: 21026 -> 10.1016/j.resuscitation.2020.12.004
Position: 21027 -> 10.1016/j.scitotenv.2020.144832
Position: 21028 -> 10.3390/ijms17030297
Position: 21029 -> 10.1007/s11060-020-03521-9
Position: 21030 -> 10.1016/s1773-035x(20)30313-0
Position: 21031 -> 10.1371/journal.pntd.0000685
Position: 21032 -> 10.1016/j.vaccine.2007.05.025
Position: 21033 -> 10.1007/s00430-019-00652-3
Position: 21034 -> 10.1245/s10434-020-09379-5
Position: 21035 -> 10.1007/s11423-020-09887-x
Position: 21036 -> 10.1007/978-3-030-49461-2_25
Position: 21037 -> 10.1101/2021.01.14.21249620
Position: 21038 -> 10.1186/s13584-020-00422-0
Position: 21039 -> 10

Position: 21200 -> 10.1101/2020.11.10.20228809
Position: 21201 -> 10.1111/eci.13412
Position: 21202 -> 10.1093/molbev/msaa163
Position: 21203 -> 10.1007/s40259-020-00421-2
Position: 21204 -> 10.1007/s40615-020-00872-x
Position: 21205 -> 10.1007/s40268-020-00312-5
Position: 21206 -> 10.1109/mcise.2004.1267606
Position: 21207 -> 10.1038/s41746-020-0279-6
Position: 21208 -> 10.1007/s00134-021-06346-w
Position: 21209 -> 10.2147/clep.s271763
Position: 21210 -> 10.1016/s1762-827x(20)44252-x
Position: 21211 -> 10.3389/fpsyg.2020.616169
Position: 21212 -> 10.1016/j.cca.2020.10.035
Position: 21213 -> 10.1016/j.cell.2020.11.016
Position: 21214 -> 10.1007/s13365-019-00800-8
Position: 21215 -> 10.1093/annweh/wxaa057
Position: 21216 -> 10.1101/2020.03.19.20039180
Position: 21217 -> 10.1007/s00432-020-03228-4
Position: 21218 -> 10.1007/s11845-020-02407-z
Position: 21219 -> 10.1155/2014/971345
Position: 21220 -> 10.1007/978-1-4939-3414-0_14
Position: 21221 -> 10.11604/pamj.supp.2020.37.21.25767
Posit

Position: 21382 -> 10.1016/j.psychres.2020.113347
Position: 21383 -> 10.1016/j.ekir.2020.04.024
Position: 21384 -> 10.5041/rmmj.10417
Position: 21385 -> 10.1016/j.puhe.2020.11.015
Position: 21386 -> 10.1016/j.ijinfomgt.2020.102182
Position: 21387 -> 10.1016/j.psj.2020.04.023
Position: 21388 -> 10.1086/422040
Position: 21389 -> 10.1007/s00705-016-3088-0
Position: 21390 -> 10.1371/journal.pone.0091103
Position: 21391 -> 10.1101/2020.10.20.20215616
Position: 21392 -> 10.1038/s41391-020-0258-7
Position: 21393 -> 10.3390/pathogens8040259
Position: 21394 -> 10.1007/s00436-019-06366-y
Position: 21395 -> 10.1245/s10434-020-09026-z
Position: 21396 -> 10.1038/s41598-018-24729-w
Position: 21397 -> 10.1101/2020.08.31.20185314
Position: 21398 -> 10.1603/ec12095
Position: 21399 -> 10.1016/j.ijhm.2020.102667
Position: 21400 -> 10.1007/s15010-020-01446-z
Position: 21401 -> 10.1016/j.clae.2020.08.003
Position: 21402 -> 10.1016/j.medj.2020.05.002
Position: 21403 -> 10.1371/journal.ppat.1005311
Position:

Position: 21562 -> 10.1007/s10640-020-00476-3
Position: 21563 -> 10.3390/v5102375
Position: 21564 -> 10.3390/pharmacy8040184
Position: 21565 -> 10.1109/tmi.2016.2528162
Position: 21566 -> 10.1101/2020.12.14.20248154
Position: 21567 -> 10.1101/2020.04.02.20048793
Position: 21568 -> 10.1002/rth2.12432
Position: 21569 -> 10.1016/j.infpip.2020.100076
Position: 21570 -> 10.1007/s15010-020-01530-4
Position: 21571 -> 10.1007/s10554-020-01980-9
Position: 21572 -> 10.1016/j.diabres.2020.108338
Position: 21573 -> 10.1177/1329878x20956409
Position: 21574 -> 10.1016/j.phymed.2020.153208
Position: 21575 -> 10.1101/2020.09.11.20191692
Position: 21576 -> 10.1007/s00249-005-0504-x
Position: 21577 -> 10.1007/s40121-020-00360-z
Position: 21578 -> 10.1016/j.ajp.2020.102196
Position: 21579 -> 10.3389/fphar.2020.571156
Position: 21580 -> 10.1007/s40617-020-00449-4
Position: 21581 -> 10.1016/j.jsat.2020.108210
Position: 21582 -> 10.3390/v10110596
Position: 21583 -> 10.1093/nar/gkt916
Position: 21584 -> 10.1

Position: 21743 -> 10.1093/eurpub/ckq054
Position: 21744 -> 10.6026/97320630016532
Position: 21745 -> 10.1093/geronb/gbaa148
Position: 21746 -> 10.1002/pro.3942
Position: 21747 -> 10.1177/2333721420979840
Position: 21748 -> 10.4269/ajtmh.12-0600
Position: 21749 -> 10.3201/eid2507.180874
Position: 21750 -> 10.1016/j.xcrm.2020.100052
Position: 21751 -> 10.1016/bs.adcom.2020.08.013
Position: 21752 -> 10.1371/journal.pone.0206001
Position: 21753 -> 10.1016/0022-2836(90)90192-o
Position: 21754 -> 10.1017/dmp.2020.217
Position: 21755 -> 10.1016/j.ijid.2020.04.065
Position: 21756 -> 10.1016/j.jviscsurg.2020.06.014
Position: 21757 -> 10.1107/s090744490602991x
Position: 21758 -> 10.13140/rg.2.2.18592.20487
Position: 21759 -> 10.1016/j.techfore.2020.120248
Position: 21760 -> 10.1016/j.csbj.2021.01.039
Position: 21761 -> 10.1093/cid/ciaa641
Position: 21762 -> 10.1038/s41598-020-76141-y
Position: 21763 -> 10.1182/blood.2020007214
Position: 21764 -> 10.1002/jmv.25414
Position: 21765 -> 10.1038/s414

Position: 21926 -> 10.1080/20008686.2020.1781330
Position: 21927 -> 10.1016/j.physd.2020.132701
Position: 21928 -> 10.1016/j.chaos.2020.110148
Position: 21929 -> 10.1186/s13567-020-00871-0
Position: 21930 -> 10.1111/j.1423-0410.2005.00653.x
Position: 21931 -> 10.1302/2633-1462.110.bjo-2020-0149.r1
Position: 21932 -> 10.3389/fimmu.2020.570018
Position: 21933 -> 10.1016/j.japh.2020.07.018
Position: 21934 -> 10.1093/annweh/wxaa116
Position: 21935 -> 10.1007/s42438-020-00203-7
Position: 21936 -> 10.1016/j.paid.2021.110706
Position: 21937 -> 10.1016/j.spasta.2020.100480
Position: 21938 -> 10.3390/s18010125
Position: 21939 -> 10.1016/j.cardfail.2020.05.019
Position: 21940 -> 10.1097/md.0000000000020370
Position: 21941 -> 10.1038/s41598-017-02325-8
Position: 21942 -> 10.1177/0020764020935488
Position: 21943 -> 10.3390/ijerph18010207
Position: 21944 -> 10.1186/s12985-016-0529-z
Position: 21945 -> 10.1016/j.peptides.2011.05.015
Position: 21946 -> 10.1016/j.resuscitation.2020.09.012
Position: 21

Position: 22106 -> 10.1177/1367877920912257
Position: 22107 -> 10.1155/2020/8198963
Position: 22108 -> 10.1186/1471-2164-10-64
Position: 22109 -> 10.1016/j.pid.2015.11.008
Position: 22110 -> 10.1186/s13223-020-00467-5
Position: 22111 -> 10.1016/j.fct.2020.111694
Position: 22112 -> 10.1016/j.diabres.2020.108214
Position: 22113 -> 10.1057/978-1-349-95940-2_1
Position: 22114 -> 10.1007/s42438-020-00126-3
Position: 22115 -> 10.1101/2020.05.19.097410
Position: 22116 -> 10.1016/s1359-6446(05)03481-1
Position: 22117 -> 10.1002/ped4.12207
Position: 22118 -> 10.1371/journal.pone.0099495
Position: 22119 -> 10.1101/2020.05.12.20099242
Position: 22120 -> 10.1002/jmv.24577
Position: 22121 -> 10.1093/cid/cix741
Position: 22122 -> 10.1371/journal.ppat.1008812
Position: 22123 -> 10.1111/cmi.12446
Position: 22124 -> 10.1016/j.jceh.2020.06.009
Position: 22125 -> 10.1158/1055-9965.epi-20-0606
Position: 22126 -> 10.1101/2020.08.12.247338
Position: 22127 -> 10.1111/cars.12288
Position: 22128 -> 10.1007/s11

Position: 22288 -> 10.3390/s19183960
Position: 22289 -> 10.1007/978-3-030-44999-5_46
Position: 22290 -> 10.1007/978-3-030-61160-6_9
Position: 22291 -> 10.2147/opth.s285966
Position: 22292 -> 10.1101/2020.07.01.20143925
Position: 22293 -> 10.1111/j.1365-2656.2011.01873.x
Position: 22294 -> 10.3390/v13010100
Position: 22295 -> 10.3389/fmicb.2020.00812
Position: 22296 -> 10.1038/nmicrobiol.2016.58
Position: 22297 -> 10.1101/2021.01.15.21249896
Position: 22298 -> 10.11604/pamj.2020.37.199.23733
Position: 22299 -> 10.1016/j.sjbs.2020.12.009
Position: 22300 -> 10.1016/j.puhe.2020.04.012
Position: 22301 -> 10.1021/acs.jmedchem.0c01063
Position: 22302 -> 10.1007/978-3-319-91184-7_15
Position: 22303 -> 10.1186/s12913-020-05446-5
Position: 22304 -> 10.2807/1560-7917.es.2020.25.18.2000543
Position: 22305 -> 10.1007/s40618-020-01323-4
Position: 22306 -> 10.1007/978-1-59745-405-6_21
Position: 22307 -> 10.1093/molehr/gau057
Position: 22308 -> 10.1007/s10479-020-03912-1
Position: 22309 -> 10.1007/s10

Position: 22471 -> 10.1371/journal.pone.0243704
Position: 22472 -> 10.1101/2021.01.07.425307
Position: 22473 -> 10.1016/b978-0-12-801238-3.95728-3
Position: 22474 -> 10.1016/j.jbi.2020.103500
Position: 22475 -> 10.1371/journal.pone.0090003
Position: 22476 -> 10.1016/j.jpeds.2005.04.066
Position: 22477 -> 10.3389/fbioe.2020.623705
Position: 22478 -> 10.1016/j.jhin.2020.08.027
Position: 22479 -> 10.14639/0392-100x-n0832
Position: 22480 -> 10.4178/epih.e2020011
Position: 22481 -> 10.1016/j.virol.2010.06.043
Position: 22482 -> 10.1057/s41254-020-00196-x
Position: 22483 -> 10.1007/s00120-020-01431-2
Position: 22484 -> 10.1016/j.febslet.2004.09.026
Position: 22485 -> 10.1007/978-3-030-55878-9_11
Position: 22486 -> 10.1007/s10900-020-00863-3
Position: 22487 -> 10.1007/978-3-030-50143-3_7
Position: 22488 -> 10.1101/2020.12.22.423985
Position: 22489 -> 10.1016/j.virusres.2020.198058
Position: 22490 -> 10.1292/jvms.16-0342
Position: 22491 -> 10.1101/2020.06.19.160747
Position: 22492 -> 10.1007/s

Position: 22651 -> 10.3390/cancers13010091
Position: 22652 -> 10.1101/2020.06.08.20125872
Position: 22653 -> 10.1016/j.fitote.2014.09.018
Position: 22654 -> 10.1371/journal.pone.0243885
Position: 22655 -> 10.1101/2020.05.19.20107144
Position: 22656 -> 10.1016/j.yamp.2020.07.002
Position: 22657 -> 10.3390/children7090125
Position: 22658 -> 10.1101/2020.12.01.20241695
Position: 22659 -> 10.1186/s13054-020-03241-6
Position: 22660 -> 10.1007/bf03184258
Position: 22661 -> 10.1186/s12955-020-01352-w
Position: 22662 -> 10.1007/s10822-020-00327-9
Position: 22663 -> 10.1016/j.jviromet.2021.114070
Position: 22664 -> 10.1002/jmrs.406
Position: 22665 -> 10.1371/journal.pntd.0008972
Position: 22666 -> 10.3390/jcm9113624
Position: 22667 -> 10.1017/s0950268820001430
Position: 22668 -> 10.1101/2020.11.17.387902
Position: 22669 -> 10.1016/j.psychres.2020.113252
Position: 22670 -> 10.1111/trf.15869
Position: 22671 -> 10.3390/ijerph18020607
Position: 22672 -> 10.1093/sleep/zsaa258
Position: 22673 -> 10.3

Position: 22833 -> 10.1016/j.jcv.2014.03.021
Position: 22834 -> 10.3389/fpsyg.2020.608677
Position: 22835 -> 10.3390/ma13173744
Position: 22836 -> 10.1016/j.celrep.2018.11.082
Position: 22837 -> 10.1016/j.buildenv.2015.02.041
Position: 22838 -> 10.1038/s41415-020-1543-5
Position: 22839 -> 10.3390/ijerph17228701
Position: 22840 -> 10.1016/j.canep.2020.101805
Position: 22841 -> 10.1007/s11423-020-09913-y
Position: 22842 -> 10.1016/j.autrev.2020.102729
Position: 22843 -> 10.3390/s20195603
Position: 22844 -> 10.1016/j.chom.2020.08.004
Position: 22845 -> 10.1101/2020.04.28.20083329
Position: 22846 -> 10.1371/journal.pone.0240150
Position: 22847 -> 10.1101/2020.11.11.20229765
Position: 22848 -> 10.1101/2020.10.30.20222786
Position: 22849 -> 10.1007/978-3-030-47436-2_10
Position: 22850 -> 10.1038/nprot.2006.452
Position: 22851 -> 10.1101/2020.04.06.028647
Position: 22852 -> 10.1016/j.virol.2014.09.020
Position: 22853 -> 10.12688/wellcomeopenres.16225.2
Position: 22854 -> 10.1021/acs.jpclett.0

Position: 23014 -> 10.1101/2020.11.08.373621
Position: 23015 -> 10.3389/fimmu.2020.02037
Position: 23016 -> 10.1007/s10140-020-01865-3
Position: 23017 -> 10.1128/jcm.00875-20
Position: 23018 -> 10.1007/978-3-540-72665-4_8
Position: 23019 -> 10.1101/2020.06.17.20134254
Position: 23020 -> 10.1016/j.worlddev.2018.08.013
Position: 23021 -> 10.3389/fmicb.2019.00419
Position: 23022 -> 10.1016/j.psychres.2020.113525
Position: 23023 -> 10.26434/chemrxiv.12275705
Position: 23024 -> 10.1038/s41467-020-19891-7
Position: 23025 -> 10.1016/j.idh.2020.09.005
Position: 23026 -> 10.1016/j.dci.2011.11.001
Position: 23027 -> 10.1016/j.dsx.2020.05.023
Position: 23028 -> 10.1016/j.dib.2020.106446
Position: 23029 -> 10.1016/j.admp.2008.03.025
Position: 23030 -> 10.1101/2020.06.24.20139006
Position: 23031 -> 10.1007/bf01379081
Position: 23032 -> 10.15585/mmwr.mm6948a3
Position: 23033 -> 10.1371/journal.pgen.1004403
Position: 23034 -> 10.1101/2020.04.25.20079111
Position: 23035 -> 10.3390/bs10120183
Position:

Position: 23196 -> 10.1016/j.virol.2009.06.006
Position: 23197 -> 10.1136/archdischild-2020-320030
Position: 23198 -> 10.1016/j.vetpar.2019.04.008
Position: 23199 -> 10.1186/s41256-020-00164-1
Position: 23200 -> 10.3390/ijerph17249326
Position: 23201 -> 10.1177/0194599820934380
Position: 23202 -> 10.1007/s10459-020-10010-1
Position: 23203 -> 10.1007/978-3-030-45439-5_10
Position: 23204 -> 10.12688/hrbopenres.13031.1
Position: 23205 -> 10.1007/978-3-030-50420-5_36
Position: 23206 -> 10.3389/fpsyg.2020.02011
Position: 23207 -> 10.1016/j.paid.2020.110398
Position: 23208 -> 10.1186/s13071-020-04274-0
Position: 23209 -> 10.1016/j.gendis.2021.01.008
Position: 23210 -> 10.1016/j.ajic.2020.06.190
Position: 23211 -> 10.1128/jvi.00902-20
Position: 23212 -> 10.1101/2020.06.02.20120295
Position: 23213 -> 10.1186/s12967-020-02405-w
Position: 23214 -> 10.1016/j.gene.2008.01.004
Position: 23215 -> 10.1101/2020.12.01.407411
Position: 23216 -> 10.1016/j.jocn.2020.05.017
Position: 23217 -> 10.4258/hir.2

Position: 23377 -> 10.15585/mmwr.mm6946a2
Position: 23378 -> 10.1016/j.cbi.2020.109348
Position: 23379 -> 10.1016/j.xphs.2020.11.023
Position: 23380 -> 10.1101/2020.04.29.20085472
Position: 23381 -> 10.1007/978-3-8274-2156-2_4
Position: 23382 -> 10.1101/2020.10.13.20211821
Position: 23383 -> 10.1101/2020.02.16.951723
Position: 23384 -> 10.1101/2020.11.24.20235887
Position: 23385 -> 10.1023/a:1008037916674
Position: 23386 -> 10.1007/s00430-019-00610-z
Position: 23387 -> 10.1101/2020.04.19.20070870
Position: 23388 -> 10.1002/ddr.10378
Position: 23389 -> 10.1101/2020.08.19.20178137
Position: 23390 -> 10.1007/s00264-009-0829-7
Position: 23391 -> 10.1136/bmjopen-2020-040649
Position: 23392 -> 10.3390/s21020402
Position: 23393 -> 10.2196/10905
Position: 23394 -> 10.1016/j.ausmj.2020.07.001
Position: 23395 -> 10.3390/ijms21051676
Position: 23396 -> 10.1016/j.bmc.2019.115273
Position: 23397 -> 10.1016/j.intimp.2020.106980
Position: 23398 -> 10.1007/s10755-020-09525-7
Position: 23399 -> 10.1016

In [None]:
dict_new_extra_info

In [None]:
df_new_extra_info = pd.DataFrame(dict_new_extra_info)

In [None]:
df_new_extra_info

In [None]:
df_new_extra_transposed = df_new_extra_info.T
df_new_extra_transposed

In [None]:
df_combined_extra_info = pd.concat([df_current_extra_info, df_new_extra_transposed],ignore_index=True)

In [None]:
df_combined_extra_info.to_pickle('extra_info_CS5099.pkl')

In [None]:
entry = AffiliationsFromScopusByDOI(client, '10.1086/605034')
entry

In [None]:
print (json.dumps(entry, ensure_ascii=False, indent=4))

In [None]:
df_test = pd.DataFrame.from_dict(entry['affiliation'], orient='columns')
df_test

In [None]:
entry['coredata']

In [None]:
## ScienceDirect (full-text) document example using DOI
doi_doc = FullDoc(doi = '10.1016/S1525-1578(10)60571-5')
if doi_doc.read(client):
    print ("doi_doc.title: ", doi_doc.title)
    doi_doc.write()   
else:
    print ("Read document failed.")
doi_doc