# CORD-19-collect-scopus-data

In general, this jupyter notebook is designated to collect additional data via scopus to enbroaden the CORD19 dataset: 
https://datadryad.org/stash/dataset/doi:10.5061/dryad.vmcvdncs0

First, relevant packages must be imported to the Notebook.

In [1]:
import numpy as np
import pandas as pd
import csv
import ast
import collections
import matplotlib.pyplot as plt
import Levenshtein as lev
from fuzzywuzzy import fuzz 
import datetime
import matplotlib.pyplot as plt
import re
from urllib.parse import urlparse
from collections import Counter

from elsapy.elsclient import ElsClient
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

import time # for sleep
from pybtex.database import parse_file, BibliographyData, Entry
import json
from elsapy.elsclient import ElsClient
from elsapy.elsdoc import AbsDoc
from elsapy.elssearch import ElsSearch

In [2]:
CORD19_CSV = pd.read_csv('../data/cord-19/CORD19_software_mentions.csv')

In [3]:
len(CORD19_CSV['doi'])

77448

In [4]:
doi = CORD19_CSV['doi']
doi

0                                 NaN
1          10.1016/j.regg.2021.01.002
2           10.1016/j.rec.2020.08.002
3        10.1016/j.vetmic.2006.11.026
4                   10.3390/v12080849
                     ...             
77443      10.1007/s11229-020-02869-9
77444                             NaN
77445     10.1101/2020.05.13.20100206
77446      10.1007/s42991-020-00052-8
77447     10.1101/2020.09.14.20194670
Name: doi, Length: 77448, dtype: object

In [5]:
#delete Nans
doi_counted = doi.value_counts()
doi_counted

10.1016/j.dsx.2020.04.012       2
10.31729/jnma.5498              2
10.1007/s41109-020-00317-8      1
10.1371/journal.pone.0240400    1
10.1155/2014/768515             1
                               ..
10.1007/s00117-020-00747-6      1
10.1016/j.xcrm.2020.100145      1
10.1016/j.amjmed.2020.04.003    1
10.1016/j.hrtlng.2020.08.021    1
10.1101/2020.06.10.145292       1
Name: doi, Length: 74302, dtype: int64

In [6]:
def AffiliationsFromScopusByDOI(client, doi):
    """obtain addiotional paper information from scopus by doi
    """
    doc_srch = ElsSearch("DOI("+doi+")",'scopus')
    doc_srch.execute(client, get_all = True)
    #print ("doc_srch has", len(doc_srch.results), "results.")
    #print(doc_srch.results)
    try:
        scopus_id=doc_srch.results[0]["dc:identifier"].split(":")[1]
        scp_doc = AbsDoc(scp_id = scopus_id)
        if scp_doc.read(client):
            # print ("scp_doc.title: ", scp_doc.title)
            scp_doc.write()   
        else:
            print ("Read document failed.")
        # print(scp_doc.data["affiliation"])
        return scp_doc.data
    except:
        return None

In [7]:
## Load configuration
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
## Initialize client
client = ElsClient(config['apikey'])
# https://api.elsevier.com/content/search/scopus?query=DOI(10.1109/MCOM.2016.7509373)&apiKey=6d485ef1fe1408712f37e8a783a285a4

In [8]:
df_current_extra_info = pd.read_pickle('extra_info_CS5099.pkl')
df_current_extra_info

Unnamed: 0,affiliation,coredata
0,"[{'affiliation-city': None, 'affilname': 'Pata...","{'srctype': 'j', 'prism:issueIdentifier': '230..."
1,"[{'affiliation-city': 'New Delhi', 'affilname'...","{'srctype': 'j', 'eid': '2-s2.0-85083171050', ..."
2,,
3,"[{'affiliation-city': 'London', 'affilname': '...","{'srctype': 'j', 'eid': '2-s2.0-79953057246', ..."
4,"[{'affiliation-city': 'Baoding', 'affilname': ...","{'srctype': 'j', 'prism:issueIdentifier': '7',..."
...,...,...
55509,"[{'affiliation-city': 'Cuernavaca', 'affilname...","{'srctype': 'j', 'prism:issueIdentifier': '1',..."
55510,,
55511,"[{'affiliation-city': 'Pierre', 'affilname': '...","{'srctype': 'j', 'eid': '2-s2.0-85088066845', ..."
55512,"[{'affiliation-city': 'Oxford', 'affilname': '...","{'srctype': 'j', 'eid': '2-s2.0-85087502180', ..."


In [9]:
len_df_current_extra_info = len(df_current_extra_info)
len_df_current_extra_info

55514

In [10]:
%%time
i = len_df_current_extra_info
dict_new_extra_info = dict()
len_dois = len(doi_counted)
while i < len_dois:
    print("Position: " + str(i) + " -> " +  doi_counted.index[i])
    dict_new_extra_info[i] = AffiliationsFromScopusByDOI(client, doi_counted.index[i])
    i = i + 1 

Position: 55514 -> 10.1186/s12929-020-00703-5
Position: 55515 -> 10.1007/s13197-020-04731-9
Position: 55516 -> 10.1371/journal.pntd.0007098
Position: 55517 -> 10.4103/ijmm.ijmm_20_178
Position: 55518 -> 10.1016/j.bbamem.2013.07.025
Position: 55519 -> 10.1093/nar/gkaa1100
Position: 55520 -> 10.3390/ijerph17217847
Position: 55521 -> 10.1101/2020.05.04.20090951
Position: 55522 -> 10.1016/j.immuni.2020.07.020
Position: 55523 -> 10.1101/099069
Position: 55524 -> 10.1016/j.neucli.2020.06.001
Position: 55525 -> 10.1111/j.1469-0691.2007.01732.x
Position: 55526 -> 10.1128/mra.00845-20
Position: 55527 -> 10.1007/978-1-59745-181-9_20
Position: 55528 -> 10.1007/s10489-020-01938-3
Position: 55529 -> 10.1093/annhyg/mel073
Position: 55530 -> 10.1007/s00467-020-04584-6
Position: 55531 -> 10.1016/j.vaccine.2009.12.078
Position: 55532 -> 10.1007/s10096-006-0246-4
Position: 55533 -> 10.1111/inr.12607
Position: 55534 -> 10.5935/0103-507x.20200063
Position: 55535 -> 10.1259/bjro.20200026
Position: 55536 ->

Position: 55697 -> 10.1016/j.apenergy.2020.115845
Position: 55698 -> 10.15252/emmm.202013426
Position: 55699 -> 10.1101/2020.04.09.20056325
Position: 55700 -> 10.1007/s00259-020-05027-y
Position: 55701 -> 10.1016/j.rinp.2020.103746
Position: 55702 -> 10.1016/j.rinp.2021.103829
Position: 55703 -> 10.3346/jkms.2020.35.e227
Position: 55704 -> 10.1007/978-94-024-1113-3_4
Position: 55705 -> 10.1101/2020.05.21.109835
Position: 55706 -> 10.3390/ijerph17228535
Position: 55707 -> 10.35241/emeraldopenres.13727.2
Position: 55708 -> 10.1186/s12951-019-0531-x
Position: 55709 -> 10.1101/2020.09.29.20203877
Position: 55710 -> 10.1016/j.clae.2020.04.002
Position: 55711 -> 10.1016/j.compedu.2020.104042
Position: 55712 -> 10.1007/s13340-020-00467-1
Position: 55713 -> 10.1007/s11606-020-06527-1
Position: 55714 -> 10.3389/fmicb.2019.01326
Position: 55715 -> 10.1016/j.antiviral.2015.01.011
Position: 55716 -> 10.1186/s12877-021-02013-3
Position: 55717 -> 10.1101/2020.06.05.134114
Position: 55718 -> 10.1016/

Position: 55878 -> 10.2471/blt.20.252742
Position: 55879 -> 10.7150/ijms.49544
Position: 55880 -> 10.3389/fvets.2019.00293
Position: 55881 -> 10.3389/fmicb.2020.580137
Position: 55882 -> 10.1038/s41372-020-00775-z
Position: 55883 -> 10.1007/978-3-030-45442-5_41
Position: 55884 -> 10.1101/2020.08.09.20171041
Position: 55885 -> 10.1016/j.amjsurg.2020.12.015
Position: 55886 -> 10.1023/a:1010912012932
Position: 55887 -> 10.1016/j.biocon.2021.108952
Position: 55888 -> 10.1101/2020.08.27.20183277
Position: 55889 -> 10.1007/s13205-020-02610-w
Position: 55890 -> 10.1093/infdis/jix209
Position: 55891 -> 10.1002/aisy.202000070
Position: 55892 -> 10.1016/j.injury.2020.05.016
Position: 55893 -> 10.1101/2020.08.14.20170290
Position: 55894 -> 10.1101/2020.06.09.141630
Position: 55895 -> 10.1101/2020.08.04.20163782
Position: 55896 -> 10.1007/s42979-020-00320-x
Position: 55897 -> 10.1093/ecco-jcc/jjaa120
Position: 55898 -> 10.1186/cc2687
Position: 55899 -> 10.7189/jogh.10.020515
Position: 55900 -> 10.

Position: 56061 -> 10.1016/j.ejrad.2020.109209
Position: 56062 -> 10.1371/journal.pone.0243789
Position: 56063 -> 10.1016/j.heliyon.2021.e05957
Position: 56064 -> 10.1101/2020.10.08.20208447
Position: 56065 -> 10.3332/ecancer.2020.ed104
Position: 56066 -> 10.4102/phcfm.v12i1.2449
Position: 56067 -> 10.1016/j.jviromet.2009.07.028
Position: 56068 -> 10.1007/s00464-020-07538-z
Position: 56069 -> 10.1016/j.mayocpiqo.2020.10.005
Position: 56070 -> 10.1128/jvi.00543-20
Position: 56071 -> 10.3390/v12060628
Position: 56072 -> 10.1023/a:1011831902219
Position: 56073 -> 10.1101/2020.06.19.20135996
Position: 56074 -> 10.1016/j.pdisas.2020.100091
Position: 56075 -> 10.1016/j.pain.2012.07.016
Position: 56076 -> 10.1111/j.1440-1843.2011.01937_3.x
Position: 56077 -> 10.1101/617910
Position: 56078 -> 10.1057/s41296-020-00421-5
Position: 56079 -> 10.1016/j.infpip.2019.100029
Position: 56080 -> 10.3390/molecules26010020
Position: 56081 -> 10.1101/2020.12.11.20210419
Position: 56082 -> 10.1016/j.jns.2020

Position: 56243 -> 10.1007/978-3-030-49161-1_33
Position: 56244 -> 10.1101/2020.05.14.20100909
Position: 56245 -> 10.1097/jhm-d-20-00131
Position: 56246 -> 10.1093/cid/ciaa1420
Position: 56247 -> 10.1644/13-mamm-a-185
Position: 56248 -> 10.5041/rmmj.10411
Position: 56249 -> 10.1016/s2213-8587(20)30216-3
Position: 56250 -> 10.1016/j.amjcard.2020.06.053
Position: 56251 -> 10.1007/978-1-0716-0211-9_14
Position: 56252 -> 10.1038/s41467-020-17892-0
Position: 56253 -> 10.1101/2020.04.14.20065664
Position: 56254 -> 10.1016/j.virusres.2020.197974
Position: 56255 -> 10.3390/ijerph17228522
Position: 56256 -> 10.1101/2020.12.19.20248561
Position: 56257 -> 10.1016/j.euf.2020.06.001
Position: 56258 -> 10.1007/978-3-319-98122-2_4
Position: 56259 -> 10.1016/j.chiabu.2020.104756
Position: 56260 -> 10.1007/978-3-030-47436-2_32
Position: 56261 -> 10.1038/s41598-020-60636-9
Position: 56262 -> 10.1101/2020.06.16.155812
Position: 56263 -> 10.1371/journal.pone.0242532
Position: 56264 -> 10.3390/molecules251

Position: 56425 -> 10.1038/s41433-019-0747-x
Position: 56426 -> 10.1186/s12917-016-0711-y
Position: 56427 -> 10.1016/j.meegid.2018.03.010
Position: 56428 -> 10.1007/s13753-020-00294-7
Position: 56429 -> 10.1016/j.evalprogplan.2019.101724
Position: 56430 -> 10.1371/journal.pone.0042343
Position: 56431 -> 10.1016/j.bmc.2020.115466
Position: 56432 -> 10.1021/acs.jpclett.0c02602
Position: 56433 -> 10.1084/jem.20201129
Position: 56434 -> 10.1101/2020.10.08.20209692
Position: 56435 -> 10.3389/fimmu.2020.574029
Position: 56436 -> 10.1016/j.jcv.2019.08.005
Position: 56437 -> 10.1038/s41581-020-0271-z
Position: 56438 -> 10.1186/1743-422x-10-331
Position: 56439 -> 10.1007/s00109-008-0370-y
Position: 56440 -> 10.1016/j.jinf.2020.10.009
Position: 56441 -> 10.1101/2020.09.04.20185645
Position: 56442 -> 10.1016/j.joms.2020.12.006
Position: 56443 -> 10.3390/v12101151
Position: 56444 -> 10.3390/jcm9072315
Position: 56445 -> 10.1093/cvr/cvaa193
Position: 56446 -> 10.1101/2020.04.10.035683
Position: 564

Position: 56608 -> 10.1016/s1473-3099(20)30273-5
Position: 56609 -> 10.1213/ane.0000000000004844
Position: 56610 -> 10.1111/febs.15375
Position: 56611 -> 10.1186/1472-6963-9-94
Position: 56612 -> 10.1016/s2215-0366(20)30150-4
Position: 56613 -> 10.1038/s41598-020-79413-9
Position: 56614 -> 10.1007/978-3-030-50153-2_57
Position: 56615 -> 10.1007/978-3-030-52705-1_37
Position: 56616 -> 10.2147/ijwh.s286088
Position: 56617 -> 10.1016/j.jfma.2021.01.013
Position: 56618 -> 10.1186/s12879-017-2784-z
Position: 56619 -> 10.1089/vim.2019.0177
Position: 56620 -> 10.1007/s10668-020-00867-y
Position: 56621 -> 10.1016/j.intimp.2020.107329
Position: 56622 -> 10.1186/s12985-020-01402-1
Position: 56623 -> 10.1056/nejmoa2035002
Position: 56624 -> 10.1038/s41390-020-01236-1
Position: 56625 -> 10.1038/srep22044
Position: 56626 -> 10.1186/s12931-020-01574-y
Position: 56627 -> 10.1371/journal.pone.0243731
Position: 56628 -> 10.3390/molecules25204725
Position: 56629 -> 10.1186/s13677-020-00211-9
Position: 5

Position: 56791 -> 10.1016/j.tmaid.2019.101504
Position: 56792 -> 10.1186/s12905-021-01177-9
Position: 56793 -> 10.1371/journal.pone.0060595
Position: 56794 -> 10.1007/s10461-020-03155-y
Position: 56795 -> 10.1097/md.0000000000021774
Position: 56796 -> 10.1101/2020.07.01.150805
Position: 56797 -> 10.1103/physrevd.102.064047
Position: 56798 -> 10.1007/s00253-019-10319-x
Position: 56799 -> 10.17269/s41997-020-00425-z
Position: 56800 -> 10.1007/bf03405419
Position: 56801 -> 10.1016/j.conx.2020.100042
Position: 56802 -> 10.1016/j.vetmic.2017.02.007
Position: 56803 -> 10.1007/s42979-020-00373-y
Position: 56804 -> 10.1016/j.chemphyslip.2020.105009
Position: 56805 -> 10.1093/geronb/gbaa186
Position: 56806 -> 10.1016/j.energy.2020.119568
Position: 56807 -> 10.1016/j.ecolmodel.2018.05.013
Position: 56808 -> 10.1093/brain/awaa375
Position: 56809 -> 10.1007/s42438-020-00166-9
Position: 56810 -> 10.1016/j.antiviral.2020.104819
Position: 56811 -> 10.1016/j.jbiotec.2013.12.006
Position: 56812 -> 10.

Position: 56971 -> 10.1007/s11606-020-06266-3
Position: 56972 -> 10.1016/j.jsurg.2020.08.030
Position: 56973 -> 10.1371/journal.pone.0121629
Position: 56974 -> 10.3389/fneur.2020.00360
Position: 56975 -> 10.1016/j.tourman.2020.104163
Position: 56976 -> 10.1101/2020.07.05.20146571
Position: 56977 -> 10.2147/rrtm.s269936
Position: 56978 -> 10.1007/s00296-020-04700-7
Position: 56979 -> 10.1101/2020.11.21.20236083
Position: 56980 -> 10.3390/vaccines8040570
Position: 56981 -> 10.1007/s11121-020-01202-7
Position: 56982 -> 10.1002/jia2.25649
Position: 56983 -> 10.1007/978-3-642-27426-8_15
Position: 56984 -> 10.1186/s12879-014-0617-x
Position: 56985 -> 10.1016/j.apnu.2020.07.026
Position: 56986 -> 10.1093/cid/ciaa468
Position: 56987 -> 10.3389/fimmu.2018.02884
Position: 56988 -> 10.1111/zph.12354
Position: 56989 -> 10.1371/journal.pone.0018928
Position: 56990 -> 10.1016/j.jviromet.2010.07.032
Position: 56991 -> 10.1186/s13756-020-00824-4
Position: 56992 -> 10.3390/antibiotics10010055
Position:

Position: 57155 -> 10.1111/j.1423-0410.2005.00653.x
Position: 57156 -> 10.1007/978-3-030-59833-4_1
Position: 57157 -> 10.1007/s11125-020-09488-9
Position: 57158 -> 10.1101/2020.12.07.415059
Position: 57159 -> 10.1101/2020.03.21.990770
Position: 57160 -> 10.1186/s12919-020-00185-1
Position: 57161 -> 10.1016/bs.host.2017.08.008
Position: 57162 -> 10.1016/j.vetimm.2008.02.003
Position: 57163 -> 10.1007/978-3-030-53288-8_10
Position: 57164 -> 10.1016/j.apm.2020.08.084
Position: 57165 -> 10.1097/pts.0000000000000795
Position: 57166 -> 10.3201/eid1002.030452
Position: 57167 -> 10.3390/ijerph17176117
Position: 57168 -> 10.1016/j.ecns.2020.07.005
Position: 57169 -> 10.1186/s12917-018-1364-9
Position: 57170 -> 10.1101/2020.12.08.20245977
Position: 57171 -> 10.1111/j.1939-1676.2011.00833.x
Position: 57172 -> 10.1101/2020.05.10.20097063
Position: 57173 -> 10.1101/2021.01.07.21249366
Position: 57174 -> 10.1101/2020.06.26.20135715
Position: 57175 -> 10.1016/j.ejmech.2014.09.089
Position: 57176 -> 1

Position: 57337 -> 10.1093/ofid/ofaa319
Position: 57338 -> 10.7554/elife.54497
Position: 57339 -> 10.1101/2019.12.31.891317
Position: 57340 -> 10.1016/j.anai.2020.07.018
Position: 57341 -> 10.1016/j.nmni.2020.100800
Position: 57342 -> 10.1016/j.heliyon.2020.e05482
Position: 57343 -> 10.1177/2374373520968392
Position: 57344 -> 10.1101/2020.04.01.020966
Position: 57345 -> 10.1111/ijcp.13557
Position: 57346 -> 10.1371/journal.pntd.0003143
Position: 57347 -> 10.3389/fbioe.2020.599800
Position: 57348 -> 10.1007/978-3-319-72962-6_2
Position: 57349 -> 10.1186/s12871-021-01241-9
Position: 57350 -> 10.1016/j.aquaculture.2014.10.018
Position: 57351 -> 10.3390/jcm9082639
Position: 57352 -> 10.3390/genes11010044
Position: 57353 -> 10.3389/fimmu.2020.00318
Position: 57354 -> 10.1007/s15007-020-2589-z
Position: 57355 -> 10.1016/j.carbpol.2020.117042
Position: 57356 -> 10.1016/j.jhlste.2020.100265
Position: 57357 -> 10.1016/s0140-6736(21)00234-8
Position: 57358 -> 10.3390/ijms21207702
Position: 57359

Position: 57519 -> 10.1007/978-3-030-45231-5_23
Position: 57520 -> 10.1371/journal.pone.0020656
Position: 57521 -> 10.3390/diagnostics10100769
Position: 57522 -> 10.1016/j.jvoice.2020.11.020
Position: 57523 -> 10.7189/jogh.10.020354
Position: 57524 -> 10.1101/2020.07.13.20152942
Position: 57525 -> 10.1016/j.resuscitation.2020.09.010
Position: 57526 -> 10.1101/2021.01.28.21250673
Position: 57527 -> 10.2147/dddt.s166051
Position: 57528 -> 10.1101/2020.07.25.20151225
Position: 57529 -> 10.3390/ijerph17197151
Position: 57530 -> 10.1016/j.scs.2021.102738
Position: 57531 -> 10.1007/s10273-020-2743-y
Position: 57532 -> 10.1101/2020.09.22.20199471
Position: 57533 -> 10.3390/jcm9113494
Position: 57534 -> 10.1016/j.pdisas.2020.100115
Position: 57535 -> 10.1186/s12917-018-1720-9
Position: 57536 -> 10.1016/j.micpath.2020.104236
Position: 57537 -> 10.1007/978-3-319-78723-7_25
Position: 57538 -> 10.1111/j.1442-9993.2005.01535.x
Position: 57539 -> 10.3390/ijms21114095
Position: 57540 -> 10.1016/j.jhe

Position: 57701 -> 10.3390/microorganisms8091287
Position: 57702 -> 10.1371/journal.ppat.1007710
Position: 57703 -> 10.1007/s10880-017-9514-y
Position: 57704 -> 10.1016/j.rbmo.2020.05.010
Position: 57705 -> 10.1016/j.mehy.2020.110027
Position: 57706 -> 10.1186/s12889-021-10207-y
Position: 57707 -> 10.1016/j.ecolecon.2020.106734
Position: 57708 -> 10.1016/j.vaccine.2020.10.059
Position: 57709 -> 10.1371/journal.pone.0242212
Position: 57710 -> 10.1186/1471-2350-12-141
Position: 57711 -> 10.1007/s42438-020-00124-5
Position: 57712 -> 10.1016/j.annemergmed.2020.07.022
Position: 57713 -> 10.3201/eid2010.140338
Position: 57714 -> 10.1186/s12889-016-3893-0
Position: 57715 -> 10.1186/1746-6148-8-208
Position: 57716 -> 10.3390/cells4040569
Position: 57717 -> 10.1016/j.jinf.2020.04.026
Position: 57718 -> 10.3390/cells9092028
Position: 57719 -> 10.1007/s10742-020-00223-7
Position: 57720 -> 10.1101/2020.07.02.20145391
Position: 57721 -> 10.3389/fmicb.2020.00320
Position: 57722 -> 10.1016/j.jstrokec

Position: 57883 -> 10.3389/fncel.2020.596072
Position: 57884 -> 10.1016/j.dib.2020.106176
Position: 57885 -> 10.1007/978-3-030-45002-1_14
Position: 57886 -> 10.1007/s42399-020-00614-4
Position: 57887 -> 10.1053/j.gastro.2020.09.033
Position: 57888 -> 10.1007/s11262-011-0570-3
Position: 57889 -> 10.1007/s12038-020-00069-8
Position: 57890 -> 10.1007/s00112-009-2064-0
Position: 57891 -> 10.1016/j.bjid.2020.07.010
Position: 57892 -> 10.1093/jalm/jfaa188
Position: 57893 -> 10.1016/j.vetmic.2009.04.025
Position: 57894 -> 10.1038/s41569-020-00503-2
Position: 57895 -> 10.1101/2020.06.03.20120337
Position: 57896 -> 10.1016/j.jpba.2020.113806
Position: 57897 -> 10.1101/2020.12.15.20248264
Position: 57898 -> 10.1101/2020.04.16.20067835
Position: 57899 -> 10.1016/j.virol.2012.07.005
Position: 57900 -> 10.1177/0020764020941567
Position: 57901 -> 10.1016/j.ijinfomgt.2020.102185
Position: 57902 -> 10.1007/s11262-019-01678-8
Position: 57903 -> 10.1016/j.bbagen.2011.06.018
Position: 57904 -> 10.1016/j.

Position: 58064 -> 10.1038/sj.bmt.1705484
Position: 58065 -> 10.1101/2020.08.26.267914
Position: 58066 -> 10.1016/j.vaccine.2016.02.067
Position: 58067 -> 10.1016/j.vetpar.2006.11.016
Position: 58068 -> 10.1038/s41374-020-00514-0
Position: 58069 -> 10.3389/fneur.2020.589901
Position: 58070 -> 10.3390/ijerph17197264
Position: 58071 -> 10.1016/j.acra.2020.07.034
Position: 58072 -> 10.1016/j.ijrobp.2020.12.011
Position: 58073 -> 10.1101/2021.01.11.426227
Position: 58074 -> 10.1016/j.amj.2020.06.010
Position: 58075 -> 10.1017/ipm.2020.115
Position: 58076 -> 10.1111/ajt.16167
Position: 58077 -> 10.1101/2020.08.20.259598
Position: 58078 -> 10.21203/rs.3.rs-94634/v1
Position: 58079 -> 10.1101/2020.10.20.20215756
Position: 58080 -> 10.1007/s10877-020-00576-x
Position: 58081 -> 10.1186/s13071-015-1070-4
Position: 58082 -> 10.1016/s2665-9913(20)30347-7
Position: 58083 -> 10.1007/s00773-020-00794-7
Position: 58084 -> 10.2147/rmhp.s254928
Position: 58085 -> 10.1016/j.tvjl.2018.11.008
Position: 580

KeyboardInterrupt: 

In [11]:
# dict_new_extra_info_saver = dict()
# i = len_df_current_extra_info
# while i < 47683:
#     #print("Position: " + str(i) + " -> " +  doi_counted.index[i])
#     dict_new_extra_info_saver[i] = dict_new_extra_info[i]
#     i = i + 1 

# dict_new_extra_info = dict_new_extra_info_saver

In [12]:
dict_new_extra_info

{55514: {'affiliation': [{'affiliation-city': 'Athens',
    'affilname': 'University of Athens Medical School',
    'affiliation-country': 'Greece'},
   {'affiliation-city': 'Athens',
    'affilname': 'National and Kapodistrian University of Athens',
    'affiliation-country': 'Greece'},
   {'affiliation-city': 'Frederick',
    'affilname': 'National Cancer Institute at Frederick',
    'affiliation-country': 'United States'}],
  'coredata': {'srctype': 'j',
   'prism:issueIdentifier': '1',
   'eid': '2-s2.0-85099258626',
   'pubmed-id': '33435929',
   'prism:coverDate': '2021-12-01',
   'prism:aggregationType': 'Journal',
   'prism:url': 'https://api.elsevier.com/content/abstract/scopus_id/85099258626',
   'subtypeDescription': 'Review',
   'dc:creator': {'author': [{'ce:given-name': 'Ioannis P.',
      'preferred-name': {'ce:given-name': 'Ioannis P.',
       'ce:initials': 'I.P.',
       'ce:surname': 'Trougakos',
       'ce:indexed-name': 'Trougakos I.P.'},
      '@seq': '1',
      '

In [13]:
df_new_extra_info = pd.DataFrame(dict_new_extra_info)

In [14]:
df_new_extra_info

Unnamed: 0,55514,55515,55516,55517,55518,55519,55520,55521,55522,55523,...,58210,58211,58212,58213,58214,58215,58216,58217,58218,58219
affiliation,"[{'affiliation-city': 'Athens', 'affilname': '...","[{'affiliation-city': 'Turin', 'affilname': 'U...","[{'affiliation-city': 'Riyadh', 'affilname': '...","[{'affiliation-city': 'New Delhi', 'affilname'...","[{'affiliation-city': 'Shanghai', 'affilname':...","[{'affiliation-city': 'Geneva', 'affilname': '...","[{'affiliation-city': 'Austin', 'affilname': '...",,"[{'affiliation-city': 'Cambridge', 'affilname'...",,...,"{'affiliation-city': 'Baltimore', 'affilname':...",,"[{'affiliation-city': 'Bethesda', 'affilname':...","{'affiliation-city': 'Tokyo', 'affilname': 'Na...","[{'affiliation-city': 'Quebec', 'affilname': '...",,"{'affiliation-city': 'Sao Paulo', 'affilname':...",,"[{'affiliation-city': 'Silchar', 'affilname': ...","[{'affiliation-city': 'Campbelltown', 'affilna..."
coredata,"{'srctype': 'j', 'prism:issueIdentifier': '1',...","{'srctype': 'j', 'prism:issueIdentifier': '6',...","{'srctype': 'j', 'prism:issueIdentifier': '4',...","{'srctype': 'j', 'eid': '2-s2.0-85088811629', ...","{'srctype': 'j', 'eid': '2-s2.0-84895512368', ...","{'srctype': 'j', 'eid': '2-s2.0-85099417502', ...","{'srctype': 'j', 'eid': '2-s2.0-85094195063', ...",,"{'srctype': 'j', 'eid': '2-s2.0-85089300175', ...",,...,"{'srctype': 'j', 'eid': '2-s2.0-85064195317', ...",,"{'srctype': 'j', 'eid': '2-s2.0-85085314156', ...","{'srctype': 'j', 'prism:issueIdentifier': '2',...","{'srctype': 'j', 'eid': '2-s2.0-85098238371', ...","{'srctype': 'j', 'eid': '2-s2.0-85085192672', ...","{'srctype': 'j', 'eid': '2-s2.0-79952191861', ...",,"{'srctype': 'j', 'eid': '2-s2.0-85098660877', ...","{'srctype': 'j', 'eid': '2-s2.0-85092295537', ..."


In [15]:
df_new_extra_transposed = df_new_extra_info.T
df_new_extra_transposed

Unnamed: 0,affiliation,coredata
55514,"[{'affiliation-city': 'Athens', 'affilname': '...","{'srctype': 'j', 'prism:issueIdentifier': '1',..."
55515,"[{'affiliation-city': 'Turin', 'affilname': 'U...","{'srctype': 'j', 'prism:issueIdentifier': '6',..."
55516,"[{'affiliation-city': 'Riyadh', 'affilname': '...","{'srctype': 'j', 'prism:issueIdentifier': '4',..."
55517,"[{'affiliation-city': 'New Delhi', 'affilname'...","{'srctype': 'j', 'eid': '2-s2.0-85088811629', ..."
55518,"[{'affiliation-city': 'Shanghai', 'affilname':...","{'srctype': 'j', 'eid': '2-s2.0-84895512368', ..."
...,...,...
58215,,"{'srctype': 'j', 'eid': '2-s2.0-85085192672', ..."
58216,"{'affiliation-city': 'Sao Paulo', 'affilname':...","{'srctype': 'j', 'eid': '2-s2.0-79952191861', ..."
58217,,
58218,"[{'affiliation-city': 'Silchar', 'affilname': ...","{'srctype': 'j', 'eid': '2-s2.0-85098660877', ..."


In [16]:
df_combined_extra_info = pd.concat([df_current_extra_info, df_new_extra_transposed],ignore_index=True)

In [17]:
df_combined_extra_info.to_pickle('extra_info_CS5099.pkl')
df_combined_extra_info

Unnamed: 0,affiliation,coredata
0,"[{'affiliation-city': None, 'affilname': 'Pata...","{'srctype': 'j', 'prism:issueIdentifier': '230..."
1,"[{'affiliation-city': 'New Delhi', 'affilname'...","{'srctype': 'j', 'eid': '2-s2.0-85083171050', ..."
2,,
3,"[{'affiliation-city': 'London', 'affilname': '...","{'srctype': 'j', 'eid': '2-s2.0-79953057246', ..."
4,"[{'affiliation-city': 'Baoding', 'affilname': ...","{'srctype': 'j', 'prism:issueIdentifier': '7',..."
...,...,...
58215,,"{'srctype': 'j', 'eid': '2-s2.0-85085192672', ..."
58216,"{'affiliation-city': 'Sao Paulo', 'affilname':...","{'srctype': 'j', 'eid': '2-s2.0-79952191861', ..."
58217,,
58218,"[{'affiliation-city': 'Silchar', 'affilname': ...","{'srctype': 'j', 'eid': '2-s2.0-85098660877', ..."


In [18]:
entry = AffiliationsFromScopusByDOI(client, '10.1086/605034')
entry

{'affiliation': [{'affiliation-city': 'Chiba',
   'affilname': 'Chiba University',
   'affiliation-country': 'Japan'},
  {'affiliation-city': 'Hanoi',
   'affilname': 'National Hospital of Pediatrics Hanoi',
   'affiliation-country': 'Viet Nam'},
  {'affiliation-city': 'Tokyo',
   'affilname': 'National Institute of Infectious Diseases',
   'affiliation-country': 'Japan'},
  {'affiliation-city': 'Tokyo',
   'affilname': 'National Center for Global Health and Medicine',
   'affiliation-country': 'Japan'},
  {'affiliation-city': 'Miyazaki',
   'affilname': 'University of Miyazaki Faculty of Medicine',
   'affiliation-country': 'Japan'},
  {'affiliation-city': 'Tokyo',
   'affilname': 'Tokai University',
   'affiliation-country': 'Japan'}],
 'coredata': {'srctype': 'j',
  'prism:issueIdentifier': '4',
  'eid': '2-s2.0-69149099607',
  'pubmed-id': '19591579',
  'prism:coverDate': '2009-08-15',
  'prism:aggregationType': 'Journal',
  'prism:url': 'https://api.elsevier.com/content/abstract/s

In [19]:
print (json.dumps(entry, ensure_ascii=False, indent=4))

{
    "affiliation": [
        {
            "affiliation-city": "Chiba",
            "affilname": "Chiba University",
            "affiliation-country": "Japan"
        },
        {
            "affiliation-city": "Hanoi",
            "affilname": "National Hospital of Pediatrics Hanoi",
            "affiliation-country": "Viet Nam"
        },
        {
            "affiliation-city": "Tokyo",
            "affilname": "National Institute of Infectious Diseases",
            "affiliation-country": "Japan"
        },
        {
            "affiliation-city": "Tokyo",
            "affilname": "National Center for Global Health and Medicine",
            "affiliation-country": "Japan"
        },
        {
            "affiliation-city": "Miyazaki",
            "affilname": "University of Miyazaki Faculty of Medicine",
            "affiliation-country": "Japan"
        },
        {
            "affiliation-city": "Tokyo",
            "affilname": "Tokai University",
            "affiliatio

In [20]:
df_test = pd.DataFrame.from_dict(entry['affiliation'], orient='columns')
df_test

Unnamed: 0,affiliation-city,affilname,affiliation-country
0,Chiba,Chiba University,Japan
1,Hanoi,National Hospital of Pediatrics Hanoi,Viet Nam
2,Tokyo,National Institute of Infectious Diseases,Japan
3,Tokyo,National Center for Global Health and Medicine,Japan
4,Miyazaki,University of Miyazaki Faculty of Medicine,Japan
5,Tokyo,Tokai University,Japan


In [21]:
entry['coredata']

{'srctype': 'j',
 'prism:issueIdentifier': '4',
 'eid': '2-s2.0-69149099607',
 'pubmed-id': '19591579',
 'prism:coverDate': '2009-08-15',
 'prism:aggregationType': 'Journal',
 'prism:url': 'https://api.elsevier.com/content/abstract/scopus_id/69149099607',
 'subtypeDescription': 'Article',
 'dc:creator': {'author': [{'ce:given-name': 'Shoji',
    'preferred-name': {'ce:given-name': 'Shoji',
     'ce:initials': 'S.',
     'ce:surname': 'Kawachi',
     'ce:indexed-name': 'Kawachi S.'},
    '@seq': '1',
    'ce:initials': 'S.',
    '@_fa': 'true',
    'affiliation': {'@id': '60022886',
     '@href': 'https://api.elsevier.com/content/affiliation/affiliation_id/60022886'},
    'ce:surname': 'Kawachi',
    '@auid': '7006325573',
    'author-url': 'https://api.elsevier.com/content/author/author_id/7006325573',
    'ce:indexed-name': 'Kawachi S.'}]},
 'link': [{'@_fa': 'true',
   '@rel': 'self',
   '@href': 'https://api.elsevier.com/content/abstract/scopus_id/69149099607'},
  {'@_fa': 'true',
 

In [22]:
## ScienceDirect (full-text) document example using DOI
doi_doc = FullDoc(doi = '10.1016/S1525-1578(10)60571-5')
if doi_doc.read(client):
    print ("doi_doc.title: ", doi_doc.title)
    doi_doc.write()   
else:
    print ("Read document failed.")
doi_doc

doi_doc.title:  Sensitive Sequencing Method for KRAS Mutation Detection by Pyrosequencing 


<elsapy.elsdoc.FullDoc at 0x1f4114190d0>