
## Which journals have published articles on RCR and metadata?
We are interested in ranking journals by their previous interest in reproducible research and metadata.

In [5]:
from eutils import Client
from secrets import api #your secret eutils api key
API_KEY = api.ncbo_key

In [35]:
ec = Client(api_key=api.apikey)
term='"reproducible research"[Title] or "reproducibility"[Title] or "reproducible"[Title]'

In [7]:
title_search=ec.esearch(db='pubmed',term='"reproducible research"[Title] or "reproducibility"[Title] or "reproducible"[Title]')



In [29]:
paset = ec.efetch(db='pubmed', id=title_search.ids, retstart=251)


TypeError: efetch() got an unexpected keyword argument 'retstart'

In [17]:
pagen=iter(paset)

In [28]:
for res in pagen:
    print(res.jrnl)

PLoS ONE
Pediatr. Pulmonol.
J. Microbiol. Methods
Am. J. Clin. Pathol.
Knee Surg Sports Traumatol Arthrosc
BMC Ophthalmol
Radiat Oncol
Environ Health
Metab. Clin. Exp.
World Neurosurg
Cardiol J
Pract Lab Med
J Clin Med
Nutrients
PLoS Comput. Biol.
Acta Radiol Open
Biostatistics
Toxicol. Sci.
Cardiovasc Diagn Ther
Acad Radiol
Nature
J Exp Orthop
Ultrasonics
ILAR J
J Opt Soc Am A Opt Image Sci Vis
Med Ultrason
Nat Commun
Open J Obstet Gynecol
Appl Spectrosc
Insights Imaging
Physiol Meas
Front Neurosci
J Nucl Cardiol
Neuroscience
Biostatistics
MAGMA
JMIR Med Educ
Int J Rehabil Res
Ophthalmol Retina
J. Cardiovasc. Electrophysiol.
Clin Neurophysiol Pract
Graefes Arch. Clin. Exp. Ophthalmol.
Soft Matter
Hum Brain Mapp
Pacing Clin Electrophysiol
Technol Health Care
J. Assist. Reprod. Genet.
J Ultrasound
J Hepatobiliary Pancreat Sci
Diagn. Cytopathol.
NMR Biomed
J. Cell Biol.
Acta Radiol
J Appl Clin Med Phys
Indoor Air
J Nucl Cardiol
Pediatr. Res.
J Bras Pneumol
Elife
Urol. Oncol.
F1000Res
Bra

In [26]:
res.jrnl

'SLAS Discov'

In [33]:
import entrezpy

In [48]:
import entrezpy.conduit
w = entrezpy.conduit.Conduit('leipzig@gmail.com')
fetch_influenza = w.new_pipeline()


In [49]:
sid = fetch_influenza.add_search({'db' : 'pubmed', 'term' : term, 'rettype':'count', 'sort' : 'Date Released', 'datetype' : 'pdat'})

In [53]:
fid = fetch_influenza.add_fetch({'retstart':100,'retmax' : 10, 'retmode' : 'text', 'rettype': 'fasta'}, dependency=sid)

In [55]:
res=w.run(fetch_influenza,quiet=True)

TypeError: run() got an unexpected keyword argument 'quiet'

In [52]:
import entrezpy.esearch.esearcher.Esearcher

e = entrezpy.esearch.esearcher.Esearcher(tool,
                                         "leipzig@gmail.com",
                                         apikey=API_KEY,
                                         apikey_var=None,
                                         threads=None,
                                         qid=None)
analyzer = e.inquire({'db' : 'pubmed',
                      'id' : [17284678, 9997],
                      'retmode' : 'text',
                      'rettype' : 'abstract'})
print(analyzer.count, analyzer.retmax, analyzer.retstart, analyzer.uids)

<entrezpy.efetch.efetch_analyzer.EfetchAnalyzer at 0x10702b6d0>

In [57]:
import entrezpy.esearch.esearcher.Esearcher

ModuleNotFoundError: No module named 'entrezpy.esearch.esearcher.Esearcher'; 'entrezpy.esearch.esearcher' is not a package

In [58]:
import entrezpy.esearch

In [59]:
import entrezpy.esearch.esearcher

In [60]:
import entrezpy.esearch.esearcher.Esearcher

ModuleNotFoundError: No module named 'entrezpy.esearch.esearcher.Esearcher'; 'entrezpy.esearch.esearcher' is not a package

Lens ID,Title,Date Published,Publication Year,Publication Type,Source Title,ISSNs,Publisher,Source Country,Author/s,Abstract,Volume,Issue Number,Start Page,End Page,Fields of Study,Keywords,MeSH Terms,Chemicals,Funding,Source URLs,External URL,PMID,DOI,Microsoft Academic ID,PMCID,Patent Citation Count,References,Scholarly Citation Count

In [27]:
import pandas as pd
pd.set_option('display.max_rows', None)

In [28]:
metadata=pd.read_csv('../../data/lens/metadata.lens.csv')
rcr=pd.read_csv('../../data/lens/rcr.lens.csv')

In [29]:
#pandas is kind of awkward compared to dplyr, let's count unique lens ids
#https://nbviewer.jupyter.org/gist/TomAugspurger/6e052140eaa5fdb6e8c0
meta_jrnls = metadata.groupby(['Source Title']).agg({"Lens ID": "count"}).rename(columns={"Lens ID": "metadata_cnt"})
rcr_jrnls = rcr.groupby(['Source Title']).agg({"Lens ID": "count"}).rename(columns={"Lens ID": "rcr_cnt"})


## Top journals for metadata in Lens
Let's use a scaled rank to make metadata and rcr comparable, as rcr has more hits

In [59]:
meta_jrnls['meta_rank']=meta_jrnls['metadata_cnt'].rank()
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
meta_jrnls['meta_scaled'] = scaler.fit_transform(meta_jrnls['meta_rank'].values.reshape(-1,1))
meta_jrnls.sort_values("metadata_cnt",ascending=False).head(n=100)

Unnamed: 0_level_0,metadata_cnt,meta_rank,meta_scaled
Source Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lecture Notes in Computer Science,854,10615.0,1.0
"International Journal of Metadata, Semantics and Ontologies",252,10614.0,0.999861
Communications in Computer and Information Science,244,10613.0,0.999722
Scientific Data,231,10612.0,0.999584
Metadata and Semantic Research,222,10611.0,0.999445
Procedia Computer Science,203,10610.0,0.999306
Journal of Library Metadata,199,10609.0,0.999167
Research and Advanced Technology for Digital Libraries,194,10608.0,0.999028
Cataloging & Classification Quarterly,176,10607.0,0.998889
D-lib Magazine,124,10605.5,0.998681


## Top journals for reproducible research in Lens

In [63]:
rcr_jrnls['rcr_rank']=rcr_jrnls['rcr_cnt'].rank()
rcr_jrnls['rcr_scaled'] = scaler.fit_transform(rcr_jrnls['rcr_rank'].values.reshape(-1,1))
rcr_jrnls.sort_values("rcr_cnt",ascending=False).head(n=100)

Unnamed: 0_level_0,rcr_cnt,rcr_rank,rcr_scaled
Source Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Nature,425,14581.0,1.0
bioRxiv,249,14580.0,0.9999
PLOS ONE,222,14579.0,0.999801
Methods of Molecular Biology,187,14578.0,0.999701
Journal of Chromatography A,154,14576.5,0.999552
Journal of Geophysical Research,154,14576.5,0.999552
Cancer Research,143,14575.0,0.999402
Journal of the Acoustical Society of America,140,14574.0,0.999303
Geophysical Research Letters,138,14573.0,0.999203
Lishizhen Medicine and Materia Medica Research,114,14572.0,0.999103


In [64]:
jrnls = meta_jrnls.merge(rcr_jrnls, how='inner', 
                                         left_on='Source Title', 
                                         right_on='Source Title')

In [65]:
jrnls['total'] = jrnls['rcr_scaled'] + jrnls['meta_scaled']

In [45]:
jrnls['default_rank'] = df['Number_legs'].rank()

Unnamed: 0_level_0,metadata_cnt,rcr_cnt,total
Source Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"""ICIC Express Letters, Part B: Applications An International Journal of Research and Surveys""",2,,
#DLFTeach Toolkit: Lesson Plans for Digital Library Instruction,1,,
.NET IL Assembler,6,,
"10th IEEE International Conference on Electronics, Circuits and Systems, 2003. ICECS 2003. Proceedings of the 2003",2,,
10th IEEE International Conference on Engineering of Complex Computer Systems (ICECCS'05),1,,
"10th International Conference on Information Science, Signal Processing and their Applications (ISSPA 2010)",1,1.0,2.0
"10th International Conference on Telecommunications, 2003. ICT 2003.",1,,
"10th International Multimedia Modelling Conference, 2004. Proceedings.",1,,
10th International Symposium on the Conservation of Monuments in the Mediterranean Basin,1,,
11th International Multimedia Modelling Conference,2,,
