## GFIF papers

In [1]:
import pandas as pd

Make a GFIF query

In [52]:
q='(fc p or fc e) and aff antioquia u. and collection:published'
q=q.replace(' ','+')
df=pd.read_json(
    'https://inspirehep.net/search?p=find+{}&rg=250&of=recjson'.format(q))

In [53]:
df.shape

(195, 40)

explore a row for a published paper based on the previous query

In [54]:
df[df.title.astype(str).str.contains('Singlet-Doublet Dirac Dark Matter and Neutrino Masses')].reset_index(drop=True).to_dict()

{'FIXME_OAI': {0: {'id': 'oai:inspirehep.net:1741056',
   'set': ['INSPIRE:HEP', 'ForSCOAP3']}},
 'abbreviated_title': {0: nan},
 'abstract': {0: [{'number': 'APS',
    'summary': 'We examine an extension of the Standard Model that addresses the dark matter puzzle and generates Dirac neutrino masses through the radiative seesaw mechanism. The new field content includes a scalar field that plays an important role in setting the relic abundance of dark matter. We analyze the phenomenology in the light of direct, indirect, and collider searches of dark matter. In this framework, the dark matter candidate is a Dirac particle that is a mixture of new singlet-doublet fields with mass mχ10≲1.1\u2009\u2009TeV. We find that the allowed parameter space of this model is broader than the well-known Majorana dark matter scenario.'},
   {'number': 'arXiv',
    'summary': 'We examine an extension of the Standard Model that addresses the dark matter puzzle and generates Dirac neutrinos masses through 

Filter published papers (query already filtered!)

In [55]:
dfp=df[~df['publication_info'].isna()].reset_index(drop=True)

Get the Journal info from crossref api

In [292]:
journal={}

In [293]:
import time
def get_journal_info(row,journal=journal,abrv_journal='publication_info'
                                        ,abrv_journal_title='title'
                                        ,doi='doi'
                                        ,container_title='container_title'
                                        ,issn='ISSN'):
    
    try:
        j=row.get(abrv_journal).get(abrv_journal_title)
    except:
        j=None
    if j and not journal.get(j):
        journal.update( {j:{}} )
    
    if not journal.get(j) and row.get(doi):
        strdoi=row.get(doi)
        try:
            di=pd.read_json( 'https://api.crossref.org/works/{}'.format( strdoi  ) )
        except:
            di=pd.DataFrame()
        if j in journal and not journal.get(j).get(container_title):
            try:
                ct=di.loc['container-title'][0][0]
                journal[j].update({container_title:ct})

            except:
                pass
        if j in journal and not journal.get(j).get(issn):
            try:
                gis=di.loc['ISSN'].get('message')[0]
                journal[j].update({issn:gis})
            except:
                pass
        #Get a new journal crossref metadata here

        time.sleep(1)
        return di

In [None]:
di=dfp.apply(get_journal_info,axis=1)

In [296]:
journal

{'Eur.Phys.J.': {'ISSN': '1434-6044',
  'container_title': 'The European Physical Journal C'},
 'Europhys.Lett.': {'ISSN': '0295-5075',
  'container_title': 'Europhysics Letters (EPL)'},
 'Int.J.Mod.Phys.': {'ISSN': '0217-751X',
  'container_title': 'International Journal of Modern Physics A'},
 'J.Phys.': {'ISSN': '0954-3899',
  'container_title': 'Journal of Physics G: Nuclear and Particle Physics'},
 'JCAP': {'ISSN': '1475-7516',
  'container_title': 'Journal of Cosmology and Astroparticle Physics'},
 'JHEP': {'ISSN': '1029-8479',
  'container_title': 'Journal of High Energy Physics'},
 'JINST': {'ISSN': '1748-0221',
  'container_title': 'Journal of Instrumentation'},
 'Mod.Phys.Lett.': {'ISSN': '0217-7323',
  'container_title': 'Modern Physics Letters A'},
 'Nucl.Phys.': {'ISSN': '0550-3213', 'container_title': 'Nuclear Physics B'},
 'Phys.Lett.': {'ISSN': '0370-2693', 'container_title': 'Physics Letters B'},
 'Phys.Rept.': {'ISSN': '0370-1573', 'container_title': 'Physics Reports'

Normalize data

In [315]:
dfp['title_TeX']=dfp['title'].apply(
                lambda d: d[0] if isinstance(d,list) else d
            ).apply(
                lambda d: d.get('title') if isinstance(d,dict) else d)

Add Journal metadata

In [371]:
dfp['Year']=dfp['publication_info'].apply(
    lambda d: d.get('year') if isinstance(d,dict) else None).apply(
    lambda y: int(y) if isinstance(y,str) else 0)

dfp['Journal_name']=dfp['publication_info'].apply(lambda d: journal.get( 
                                        d.get('title') ).get('container_title') 
                                        if isinstance(d,dict) 
                                        else None)
dfp['ISSN']=dfp['publication_info'].apply(lambda d: journal.get( 
                                        d.get('title') ).get('ISSN') 
                                        if isinstance(d,dict) 
                                        else None)

In [372]:
dfp['udea_authors']=dfp['authors'].apply(lambda l: [ #loop for each author in the list
        '{} {}'.format(d.get('first_name'), d.get('last_name')) #Extract full name 
         for d in l 
           if str(d.get('affiliation')).find('Antioquia U.')>-1 #For UdeA authors
                               ])
dfp['first_udea_author']=dfp['udea_authors'].str[0]

In [373]:
sost=dfp[['title_TeX','Journal_name','first_udea_author','Year','ISSN','doi']]

In [376]:
sost[sost.Year>2017]

Unnamed: 0,title_TeX,Journal_name,first_udea_author,Year,ISSN,doi
0,Observation of the $\Lambda_\mathrm{b}^0 \to$ ...,Physics Letters B,Jhovanny Mejia Guisao,2020,0370-2693,10.1016/j.physletb.2020.135203
1,Search for supersymmetry with a compressed mas...,Physical Review Letters,Jhovanny Mejia Guisao,2020,0031-9007,10.1103/PhysRevLett.124.041803
2,Dirac neutrino mass generation from Majorana m...,Physical Review D,Julian Calle,2020,2470-0010,10.1103/PhysRevD.101.035004
3,Search for long-lived particles using delayed ...,Physical Review D,Jhovanny Mejia Guisao,2019,2470-0010,10.1103/PhysRevD.100.112003
4,Evidence for WW production from double-parton ...,The European Physical Journal C,Jhovanny Mejia Guisao,2020,1434-6044,10.1140/epjc/s10052-019-7541-6
5,Measurements of differential Z boson productio...,Journal of High Energy Physics,Jhovanny Mejia Guisao,2019,1029-8479,10.1007/JHEP12(2019)061
6,Search for low mass vector resonances decaying...,Physical Review D,Jhovanny Mejia Guisao,2019,2470-0010,10.1103/PhysRevD.100.112007
7,Searches for physics beyond the standard model...,The European Physical Journal C,Jhovanny Mejia Guisao,2020,1434-6044,10.1140/epjc/s10052-019-7493-x
8,Search for a charged Higgs boson decaying into...,Journal of High Energy Physics,Jhovanny Mejia Guisao,2020,1029-8479,10.1007/JHEP01(2020)096
9,Search for supersymmetry using Higgs boson to ...,Journal of High Energy Physics,Jhovanny Mejia Guisao,2019,1029-8479,10.1007/JHEP11(2019)109


In [230]:
j='Phys.Lett.'
if not journal.get(j):print(1)

1


In [177]:
        if not journal.get(j).get(container_title):
            try:
                ct=di.loc['container-title'][0]
                journal[j].update({container_title:ct})
            except:
                pass
        if not journal.get(j).get(issn):
            try:
                gis=di.loc['ISSN'].get('message')[0]
                journal[j].update({issn:gis})
            except:
                pass
        

Unnamed: 0,message,message-type,message-version,status
DOI,10.1016/j.physletb.2020.135203,work,1.0.0,ok
ISSN,[0370-2693],work,1.0.0,ok
URL,http://dx.doi.org/10.1016/j.physletb.2020.135203,work,1.0.0,ok
alternative-id,[S0370269320300071],work,1.0.0,ok
article-number,135203,work,1.0.0,ok
assertion,"[{'name': 'publisher', 'value': 'Elsevier', 'l...",work,1.0.0,ok
author,"[{'family': 'Sirunyan', 'given': 'A.M.', 'affi...",work,1.0.0,ok
container-title,[Physics Letters B],work,1.0.0,ok
content-domain,"{'domain': ['elsevier.com', 'sciencedirect.com...",work,1.0.0,ok
created,"{'date-time': '2020-01-09T16:36:24Z', 'timesta...",work,1.0.0,ok


In [147]:
get_journal_info( {'publication_info':{'title':'B'}})

In [148]:
journal

{'A': {}, 'B': {}}

In [139]:
tmp.title

0    {'title': 'Observation of the $\Lambda_\mathrm...
Name: title, dtype: object

In [155]:
journal

{'Phys.Lett.': {}}

In [79]:

t=0.1
di=pd.read_json( 'https://api.crossref.org/works/{}'.format(doi) )
if not journal[j].get('container-title'):
    try:
        ct=di.loc['container-title'][0]
    except:
        ct=''
    journal[j]['container-title']=ct
if not journal[j].get('ISSN'):
    try:
        issn=di.loc['ISSN'].get('message')[0]
    except:
        issn=''
    journal[j]['ISSN']=issn
time.sleep=t

In [78]:
di

Unnamed: 0,message,message-type,message-version,status
DOI,10.1016/j.physletb.2020.135203,work,1.0.0,ok
ISSN,[0370-2693],work,1.0.0,ok
URL,http://dx.doi.org/10.1016/j.physletb.2020.135203,work,1.0.0,ok
alternative-id,[S0370269320300071],work,1.0.0,ok
article-number,135203,work,1.0.0,ok
assertion,"[{'name': 'publisher', 'value': 'Elsevier', 'l...",work,1.0.0,ok
author,"[{'family': 'Sirunyan', 'given': 'A.M.', 'affi...",work,1.0.0,ok
container-title,[Physics Letters B],work,1.0.0,ok
content-domain,"{'domain': ['elsevier.com', 'sciencedirect.com...",work,1.0.0,ok
created,"{'date-time': '2020-01-09T16:36:24Z', 'timesta...",work,1.0.0,ok
