In [362]:
%%writefile doi.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Search doi by title and fist author surname
    based on https://github.com/torfbolt/DOI-finder
    See: http://www.crossref.org/guestquery/#textsearch
'''

lower_first_char = lambda s: s[:1].lower() + s[1:] if s else ''
def search_doi(surname='Florez',\
    title=r'Baryonic violation of R-parity from anomalous $U(1)_H$',other=''):
    '''
    Search doi from http://search.crossref.org/ 
    '''
    import re
    import requests
    doi={}
    search=''
    if surname:
        search=surname
    if title:
        if len(search)>0:
            search=search+', '+title
    if other:
        if len(search)>0:
            search=search+', '+other
            
    r=requests.get('http://search.crossref.org/?q=%s' %search)
    urldoi='http://dx.doi.org/'
    doitmp=r.text.split(urldoi)[1].split("\'>")[0].replace('&lt;','<').replace('&gt;','>')
    #check doi is right by searching for all words in doi -> output title
    if doitmp:
        json='https://api.crossref.org/v1/works/'
        rr=requests.get( json+urldoi+doitmp )
        if rr.status_code==200:
            if rr.json().has_key('message'):
                chktitle = re.sub(r"\$.*?\$","",title) # better remove all math expressions
                chktitle = re.sub(r"[^a-zA-Z0-9 ]", " ", chktitle).split(' ')
                if chktitle:
                    if not -1 in [(rr.json()["message"]['title'][0]).find(w)  for w in chktitle]:
                        doi=rr.json()["message"]
                    
    return doi
    
def general_search_doi(surname='Florez',\
    title=r'Baryonic violation of R-parity from anomalous $U(1)_H$'):
    '''
    Search doi from http://search.crossref.org/ with special format
    '''
    doi=search_doi(surname,title)
    if doi.has_key('author'):
        doi['Author']=doi['author'][0]['family']
    if doi.has_key('title'):                
        doi['Article Title']=doi['title'][0]                                            
    if doi.has_key('container-title') and len(doi['container-title'])==2:                    
        doi['Journal Title']=doi['container-title'][1]                        
    if doi.has_key('published-online'):                    
        doi['Year']=str(doi['published-online']['date-parts'][0][0])
    for k in ['Volume','Issue','Page']:                    
        if doi.has_key(lower_first_char(k)):                        
            doi[k]=doi['volume']                                                 
                        
    return doi

def searchdoi(title='a model of  leptons', surname='Weinberg'):
    """
    based on https://github.com/torfbolt/DOI-finder
    See: http://www.crossref.org/guestquery/
    
    Search for the metadata of given a title; e.g.  "A model of  leptons" 
   (case insensitive), and the Surname (only) for the first author, 
    e.g. Weinberg 
                      
    returns a dictionary with the keys:

       ['Article Title','Author','ISSN','Volume','Persistent Link','Year',
        'Issue','Page','Journal Title'],

       where 'Author' is really the surname of the first author
    """
    import mechanize
    import re
    from bs4 import BeautifulSoup
    
    browser = mechanize.Browser()
    browser.set_handle_robots(False)
    browser.addheaders = [('User-agent', 'Firefox')] 
    browser.open("http://www.crossref.org/guestquery/")
    assert browser.viewing_html()
    browser.select_form(name="form2")
    # use only surname of first author
    browser["auth2"] =  surname
    browser["atitle2"] = title
    response = browser.submit()
    sourcecode = response.get_data()
    result = re.findall(r"\<table cellspacing=1 cellpadding=1 width=600 border=0\>.*?\<\/table\>" ,sourcecode, re.DOTALL)
    if len(result) > 0:
        html=result[0] 
        if re.search('No DOI found',html):
            html='<table><tr><td>No DOI found<td></tr></table>'
    else:
        doi={}
        #return {}         

    soup = BeautifulSoup(html)
    table = soup.find("table")

    dataset = []
    for row in table.find_all("tr"):
        for tdi in row.find_all("td"):
            dataset.append(tdi.get_text())
            
    if len(dataset)==20:
        headings=dataset[:9]
        datasets=dataset[10:-1]
        doi=dict(zip(headings,datasets))
        
    else:
        doi={}
        
    if doi:
        if doi.has_key('ISSN') and doi.has_key('Persistent Link'):
            doi[u'URL']=doi['Persistent Link']
            doi[u'DOI']=doi['Persistent Link'].split('http://dx.doi.org/')[-1]

            
    return doi

if __name__ == "__main__":
    import sys
    import re
    title='';first_author_surname=''
    if sys.argv[1]:
        title=sys.argv[1]
    if sys.argv[2]:
        first_author_surname=sys.argv[2]
        
    d=searchdoi(title,first_author_surname)
    if not d:
        print 'General search:<br/>'
        d=general_search_doi(first_author_surname,title)
        
    ref='';sep=','
    for k in ['Author','Article Title','Journal Title','Volume','Issue','Page','Year']:
        if d.has_key(k):
            if k=='Author':
                d['Author'] = re.sub(r"[^a-zA-Z0-9 ]", " ",d['Author'] ) #remove non standard characters 
            if k=='Volume':
                d[k]='<strong>%s</strong>' %d[k]#.decode('utf-8')
            if k=='Year':
                sep=''
            ref=ref+d[k]+sep

    if d.has_key('URL'):
        print '''
            <br/>DOI: <a href="%s">%s</a><br/>
            Ref: %s<br/>
            <br>
            CODE at <a href="https://github.com/restrepo/webpy">GitHub</a>: doi.py<br/><br/>
        ''' %(d['URL'],d['URL'],ref) #.encode('utf-8'))
        print '''Official search at <a href="http://www.crossref.org/guestquery/#textsearch">crossref</a><br/>
        or <a href="http://search.crossref.org/">Search Crossref</a><br/>'''
    else:
        print '<br/>DOI lookup failed: try in <a href="http://search.crossref.org/">Crossref</a><br/>'
    

Overwriting doi.py


In [361]:
run doi.py "Formation, habitability, and detection of extrasolar moons" "Heller"


            <br/>DOI: <a href="http://dx.doi.org/10.1089/ast.2014.1147">http://dx.doi.org/10.1089/ast.2014.1147</a><br/>
            Ref: Heller,Formation, Habitability, and Detection of Extrasolar Moons,Astrobiology,<strong>14</strong>,9,798,2014<br/>
            <br>
            CODE at <a href="https://github.com/restrepo/webpy">GitHub</a>: doi.py<br/><br/>
        
Official search at <a href="http://www.crossref.org/guestquery/#textsearch">crossref</a><br/>
        or <a href="http://search.crossref.org/">Search Crossref</a><br/>


In [20]:
d=searchdoi("Formation, habitability, and detection of extrasolar moons","Heller")
d

{u'Article Title': u'Formation, Habitability, and Detection of Extrasolar Moons',
 u'Author': u'Heller',
 u'DOI': u'http://dx.doi.org/10.1089/ast.2014.1147',
 u'ISSN': u'1531-1074',
 u'Issue': u'9',
 u'Journal Title': u'Astrobiology',
 u'Page': u'798',
 u'Persistent Link': u'http://dx.doi.org/10.1089/ast.2014.1147',
 u'Volume': u'14',
 u'Year': u'2014'}

In [23]:
if 1==1:
    ref='';sep=','
    for k in ['Author','Article Title','Journal Title','Volume','Issue','Page','Year']:
        if d.has_key(k):
            if k=='Volume':
                d[k]='<strong>%s</strong>' %d[k]
            if k=='Year':
                sep=''
            ref=ref+d[k]+sep

    print '''
        DOI: %s<br/>
        Ref: %s<br/>
        <br>
        CODE at https://github.com/restrepo/webpy: doi.py
    ''' %(d['DOI'],ref)


        DOI: http://dx.doi.org/10.1089/ast.2014.1147<br/>
        Ref: Heller,Formation, Habitability, and Detection of Extrasolar Moons,Astrobiology,<strong>14</strong>,9,798,2014<br/>
        <br>
        CODE at https://github.com/restrepo/webpy: doi.py
    


In [None]:
 conn = httplib.HTTPConnection("www.crossref.org:80")
    conn.request("POST", "/guestquery/", params, headers)

In [77]:
import  requests

title="Formation, habitability, and detection of extrasolar moons"; surname="Heller"
headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html",\
               "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
payload = dict(titlesearch="titlesearch", auth2 = surname, atitle = title, multi_hit = "on",\
                   article_title_search = "Search", queryType = "author-title")
r = requests.post('http://www.crossref.org/guestquery/', data=payload,headers=headers)
r.text.find('http://dx.doi.org/10.1089/ast.2014.1147')

-1

In [76]:
import urllib
import httplib
title="Formation, habitability, and detection of extrasolar moons"; surname="Heller"
params = urllib.urlencode({"titlesearch":"titlesearch", "auth2" : surname, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
conn = httplib.HTTPConnection("www.crossref.org:80")
conn.request("POST", "/guestquery/", params, headers)
response = conn.getresponse()
r = response.read()
r.find('http://dx.doi.org/10.1089/ast.2014.1147')

25620

In [11]:
sourcecode.find('http://dx.doi.org/10.1089/ast.2014.1147')

25627

In [4]:
def searchdoi(title='a model of  leptons', surname='Weinberg'):
    """
    based on https://github.com/torfbolt/DOI-finder
    See: http://www.crossref.org/guestquery/
    
    Search for the metadata of given a title; e.g.  "A model of  leptons" 
   (case insensitive), and the Surname (only) for the first author, 
    e.g. Weinberg 
                      
    returns a dictionary with the keys:

       ['Article Title','Author','ISSN','Volume','Persistent Link','Year',
        'Issue','Page','Journal Title'],

       where 'Author' is really the surname of the first author
    """
    import mechanize
    import re
    from bs4 import BeautifulSoup
    
    title = re.sub(r"\$.*?\$","",title) # better remove all math expressions
    title = re.sub(r"[^a-zA-Z0-9 ]", " ", title) #remove non standard characters
    surname = re.sub(r"[{}'\\]","", surname) #remove non standard characters

    browser = mechanize.Browser()
    browser.set_handle_robots(False)
    browser.addheaders = [('User-agent', 'Firefox')] 
    browser.open("http://www.crossref.org/guestquery/")
    assert browser.viewing_html()
    browser.select_form(name="form2")
    # use only surname of first author
    browser["auth2"] =  surname
    browser["atitle2"] = title
    response = browser.submit()
    sourcecode = response.get_data()
    result = re.findall(r"\<table cellspacing=1 cellpadding=1 width=600 border=0\>.*?\<\/table\>" ,sourcecode, re.DOTALL)
    if len(result) > 0:
        html=result[0] 
        if re.search('No DOI found',html):
            html='<table><tr><td>No DOI found<td></tr></table>'
    else:
        doi={}
        #return {}         

    soup = BeautifulSoup(html)
    table = soup.find("table")

    dataset = []
    for row in table.find_all("tr"):
        for tdi in row.find_all("td"):
            dataset.append(tdi.get_text())
            
    if len(dataset)==20:
        headings=dataset[:9]
        datasets=dataset[10:-1]
        doi=dict(zip(headings,datasets))
        
    else:
        doi={}
        
    if doi:
        if doi.has_key('ISSN') and doi.has_key('Persistent Link'):
            doi['ISSN']=re.sub('([a-zA-Z0-9]{4})([a-zA-Z0-9]{4})','\\1-\\2',doi['ISSN'])
            doi[u'DOI']=doi['Persistent Link']
            
    return doi

In [5]:
searchdoi('Formation, Habitability, and Detection of Extrasolar Moons','Heller')

{u'Article Title': u'Formation, Habitability, and Detection of Extrasolar Moons',
 u'Author': u'Heller',
 u'DOI': u'http://dx.doi.org/10.1089/ast.2014.1147',
 u'ISSN': u'1531-1074',
 u'Issue': u'9',
 u'Journal Title': u'Astrobiology',
 u'Page': u'798',
 u'Persistent Link': u'http://dx.doi.org/10.1089/ast.2014.1147',
 u'Volume': u'14',
 u'Year': u'2014'}

In [90]:
import mechanize
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.addheaders = [('User-agent', 'Firefox')] 
browser.open("http://gfif.udea.edu.co/python/doi.php")
assert browser.viewing_html()
browser.select_form(name="form")
# use only surname of first author
browser["title"] = "Formation, habitability, and detection of extrasolar moons" #re.sub(r'[^a-zA-Z0-9 ]+', ' ', title)
browser["surname"] =  "Heller" #re.sub(r'[A-Z] ', ' ',
#re.sub(r'[^a-zA-Z0-9 ]+', ' ', author).split("and")[0])
response = browser.submit()
sourcecode = response.get_data()

FormNotFoundError: no form matching name 'form'

In [28]:
import requests

URL = 'http://gfif.udea.edu.co/python/revista.php'
payload = {
    'doi':'http://dx.doi.org/10.1089/ast.2014.1147',
}

session = requests.session()
r = requests.post(URL, params=payload)
r.content

'DOI (and hit ENTER):\n<form>\n<input name="doi" type="text" />\n</form>\n\nCopy the next table and paste into the Copy sheet of:\n               <a href="https://goo.gl/WnSY7M">"Formato revista"</a>,<br/>\n               and fill the empy fields in that Copy sheet.<br/>\n               Fill (or fix) for ISSN Colciencias, journal country, city and language at: \n               <a href="https://goo.gl/5nfX7c">https://goo.gl/5nfX7c</a><br/><table border="1"><tr><td><strong>T\xc3\xadtulo del art\xc3\xadculo</strong></td><td> Formation, Habitability, and Detection of Extrasolar Moons </td></tr>\n<tr><td><strong>Nombre de la revista</strong></td><td> Astrobiology </td></tr>\n<tr><td><strong>DOI</strong></td><td> 10.1089/ast.2014.1147 </td></tr>\n<tr><td><strong>Instituci\xc3\xb3n que publica</strong></td><td> Mary Ann Liebert Inc </td></tr>\n<tr><td><strong>Pa\xc3\xads</strong></td><td>  </td></tr>\n<tr><td><strong>Ciudad</strong></td><td>  </td></tr>\n<tr><td><strong>ISSN Colciencias</stro

In [26]:
import requests

URL = 'http://gfif.udea.edu.co/python/doi.php'
payload = {
    'surname':"Heller",
    'title': "Formation, habitability, and detection of extrasolar moons",
}

session = requests.session()
r = requests.get(URL, params=payload)

In [27]:
r.content

'<html>\n<head></head>\n<body>\n<form>\nTitle:<br/>\n<input name="title" type="text" size=\'60\'><br/>\nFirst author surname:<br/>\n<input name="surname" type="text"><br/>\n<input type="submit" value="Submit">\n</form><br><br/>\n\n\n            <br/>DOI: <a href="http://dx.doi.org/10.1089/ast.2014.1147">http://dx.doi.org/10.1089/ast.2014.1147</a><br/>\n            Ref: Heller,Formation, Habitability, and Detection of Extrasolar Moons,Astrobiology,<strong>14</strong>,9,798,2014<br/>\n            <br>\n            CODE at <a href="https://github.com/restrepo/webpy">GitHub</a>: doi.py<br/><br/>\n        \nOfficial search at <a href="http://www.crossref.org/guestquery/#textsearch">crossref</a>\n</body>\n</html>'

In [19]:
import mechanize
br = mechanize.Browser()
# Browser options
br.set_handle_robots(False) # ignore robots
br.open("http://gfif.udea.edu.co/python/revista.php")
br.select_form(nr=0)
br['doi'] = 'http://dx.doi.org/10.1089/ast.2014.1147'
res = br.submit()

In [17]:
res.read()

'DOI (and hit ENTER):\n<form>\n<input name="doi" type="text" />\n</form>\n\nCopy the next table and paste into the Copy sheet of:\n               <a href="https://goo.gl/WnSY7M">"Formato revista"</a>,<br/>\n               and fill the empy fields in that Copy sheet.<br/>\n               Fill (or fix) for ISSN Colciencias, journal country, city and language at: \n               <a href="https://goo.gl/5nfX7c">https://goo.gl/5nfX7c</a><br/><table border="1"><tr><td><strong>T\xc3\xadtulo del art\xc3\xadculo</strong></td><td> Formation, Habitability, and Detection of Extrasolar Moons </td></tr>\n<tr><td><strong>Nombre de la revista</strong></td><td> Astrobiology </td></tr>\n<tr><td><strong>DOI</strong></td><td> 10.1089/ast.2014.1147 </td></tr>\n<tr><td><strong>Instituci\xc3\xb3n que publica</strong></td><td> Mary Ann Liebert Inc </td></tr>\n<tr><td><strong>Pa\xc3\xads</strong></td><td>  </td></tr>\n<tr><td><strong>Ciudad</strong></td><td>  </td></tr>\n<tr><td><strong>ISSN Colciencias</stro

In [18]:
res.get_data()

'DOI (and hit ENTER):\n<form>\n<input name="doi" type="text" />\n</form>\n\nCopy the next table and paste into the Copy sheet of:\n               <a href="https://goo.gl/WnSY7M">"Formato revista"</a>,<br/>\n               and fill the empy fields in that Copy sheet.<br/>\n               Fill (or fix) for ISSN Colciencias, journal country, city and language at: \n               <a href="https://goo.gl/5nfX7c">https://goo.gl/5nfX7c</a><br/><table border="1"><tr><td><strong>T\xc3\xadtulo del art\xc3\xadculo</strong></td><td> Formation, Habitability, and Detection of Extrasolar Moons </td></tr>\n<tr><td><strong>Nombre de la revista</strong></td><td> Astrobiology </td></tr>\n<tr><td><strong>DOI</strong></td><td> 10.1089/ast.2014.1147 </td></tr>\n<tr><td><strong>Instituci\xc3\xb3n que publica</strong></td><td> Mary Ann Liebert Inc </td></tr>\n<tr><td><strong>Pa\xc3\xads</strong></td><td>  </td></tr>\n<tr><td><strong>Ciudad</strong></td><td>  </td></tr>\n<tr><td><strong>ISSN Colciencias</stro

In [47]:
title="Formation, habitability, and detection of extrasolar moons"
surname="Heller"    
import requests

URL = 'http://www.crossref.org/guestquery/'
payload = {
    'auth2':"Heller",
    'atitle2': "Formation, habitability, and detection of extrasolar moons"
}

session = requests.session()
r = requests.get(URL, params=payload)


In [41]:
r.content.find('http://dx.doi.org/10.1089/ast.2014.1147') # for binary uot

-1

In [48]:
r.text.find('http://dx.doi.org/10.1089/ast.2014.1147')

-1

In [43]:
r=requests.post(URL, data=payload)

In [44]:
r.content.find('http://dx.doi.org/10.1089/ast.2014.1147')

-1

In [46]:
r.text.find('http://dx.doi.org/10.1089/ast.2014.1147')

-1

In [112]:
query='a restrepo,d'
URL = "http://inspirehep.net/search?p='%s'&of=hx" %query
session = requests.session()
r = requests.get(URL)
res=r.text.split('<pre>')[1:-1]+[r.text.split('<pre>')[-1].split('<div ')[0]]
for l in res:
    print l.split(r'</pre>')[0]



%%% contains utf-8, see: http://inspirehep.net/info/faq/general#utf8
%%% add \usepackage[utf8]{inputenc} to your latex preamble

@article{Restrepo:2015sjs,
      author         = "Restrepo, D. and Rivera, A. and Sánchez, M. and Zapata,
                        O.",
      title          = "{A model with a viable dark matter candidate and massive
                        neutrinos}",
      booktitle      = "{Proceedings, 10th Latin American Symposium on High
                        Energy Physics (SILAFAE 2014)}",
      journal        = "Nucl. Part. Phys. Proc.",
      volume         = "267-269",
      year           = "2015",
      pages          = "367-369",
      doi            = "10.1016/j.nuclphysbps.2015.10.132",
      SLACcitation   = "%%CITATION = INSPIRE-1415059;%%"
}


@article{Sierra:2015zma,
      author         = "Aristizabal Sierra, Diego and Herrero-Garcia, J. and
                        Restrepo, D. and Vicente, A.",
      title          = "{Diboson anomaly: Heavy Higgs r

In [109]:
res=r.text.split('<pre>')[1:-1]+[r.text.split('<pre>')[-1].split('<div ')[0]]

[u'\n\n%%% contains utf-8, see: http://inspirehep.net/info/faq/general#utf8\n%%% add \\usepackage[utf8]{inputenc} to your latex preamble\n\n@article{Restrepo:2015sjs,\n      author         = "Restrepo, D. and Rivera, A. and S\xe1nchez, M. and Zapata,\n                        O.",\n      title          = "{A model with a viable dark matter candidate and massive\n                        neutrinos}",\n      booktitle      = "{Proceedings, 10th Latin American Symposium on High\n                        Energy Physics (SILAFAE 2014)}",\n      journal        = "Nucl. Part. Phys. Proc.",\n      volume         = "267-269",\n      year           = "2015",\n      pages          = "367-369",\n      doi            = "10.1016/j.nuclphysbps.2015.10.132",\n      SLACcitation   = "%%CITATION = INSPIRE-1415059;%%"\n}\n</pre>\n',
 u'\n@article{Sierra:2015zma,\n      author         = "Aristizabal Sierra, Diego and Herrero-Garcia, J. and\n                        Restrepo, D. and Vicente, A.",\n      title 

In [107]:
r.text.split('<pre>')[-1].split('<div ')[0]

u'\n@inproceedings{Allanach:1999bf,\n      author         = "Allanach, B. and others",\n      title          = "{Searching for R parity violation at Run II of the\n                        Tevatron}",\n      booktitle      = "{Physics at Run II: Workshop on Supersymmetry / Higgs:\n                        Summary Meeting Batavia, Illinois, November 19-21, 1998}",\n      collaboration  = "R parity Working Group",\n      url            = "http://lss.fnal.gov/cgi-bin/find_paper.pl?pub-00-387",\n      year           = "1999",\n      eprint         = "hep-ph/9906224",\n      archivePrefix  = "arXiv",\n      primaryClass   = "hep-ph",\n      reportNumber   = "SLAC-REPRINT-1998-046, ANL-HEP-CP-99-74,\n                        FERMILAB-PUB-00-387-T",\n      SLACcitation   = "%%CITATION = HEP-PH/9906224;%%"\n}\n</pre>\n\n                  '

In [88]:
result=r.text

In [89]:
result

u'[{"reference": [{"volume": "1311", "title": "JHEP,1311,011", "year": "2013", "order_number": "1", "authors": ["Restrepo, D.", "Zapata, O.", "Yaguna, C."]}, {"volume": "1207", "title": "JHEP,1207,153", "year": "2012", "order_number": "2", "authors": ["Bonnet, F.", "Hirsch, M.", "Ota, T.", "Winter, W."]}, {"volume": "1402", "title": "JCAP,1402,011", "year": "2014", "order_number": "3", "authors": ["Cheung, C.", "Sanford, D."]}, {"order_number": "4", "year": "2013", "misc": "SARAH 4: A tool for (not only SUSY) model builders", "authors": "F. Staub"}, {"volume": "618", "title": "Nucl.Phys,B618,717", "year": "2001", "order_number": "5", "authors": ["Casas, J.A.", "Ibarra, A."]}, {"volume": "90", "title": "Phys.Rev.,D90,093006", "year": "2014", "order_number": "6", "authors": ["Forero, D.V.", "Tortola, M.", "Valle, J.W.F."]}, {"order_number": "7", "misc": "Planckresults. XVI Cosmological parameters", "year": "2013"}, {"order_number": "8", "authors": "P. Cushman, et al.", "misc": "E", "repo

In [90]:
jr=json.loads(result)

In [91]:
len(jr)

25

In [93]:
jr[0]

{u'FIXME_OAI': {u'id': u'oai:inspirehep.net:1415059', u'set': u'INSPIRE:HEP'},
 u'abstract': {u'number': u'Elsevier',
  u'summary': u'By extending the SM with two scalar singlets, a vector-like fermion doublet and a fermion singlet, all odd under a Z 2 symmetry, it is possible to explain the correct dark matter relic density and also fulfill the main constraints for neutrino physics. In this work, we only consider the case of fermionic dark matter. We study the parameter space, first of all taking into account the relic density constraint, which gives us a set of parameters that can be used as inputs for the Casas-Ibarra parametrization in order to get the remaining parameters that also match neutrino physics. We also analyze possible constraints from lepton flavor violation processes such as \u03bc\u2192e\u03b3 .'},
 u'authors': [{u'affiliation': u'Antioquia U.',
   u'first_name': u'D.',
   u'full_name': u'Restrepo, D.',
   u'last_name': u'Restrepo'},
  {u'affiliation': u'Antioquia U.

In [73]:
import pandas as pd
pd.DataFrame({1:1,2:2})

In [60]:
r.text.find('SIL')

-1

In [125]:
surname='Florez'
title='Baryonic violation of R-parity from anomalous $U(1)_H$'
if 1==1:
    import mechanize
    import re
    from bs4 import BeautifulSoup
    
    #title = re.sub(r"\$.*?\$","",title) # better remove all math expressions
    #title = re.sub(r"[^a-zA-Z0-9 ]", " ", title) #remove non standard characters
    #surname = re.sub(r"[{}'\\]","", surname) #remove non standard characters

    browser = mechanize.Browser()
    browser.set_handle_robots(False)
    browser.addheaders = [('User-agent', 'Firefox')] 
    browser.open("http://www.crossref.org/guestquery/")
    assert browser.viewing_html()
    browser.select_form(name="form2")
    # use only surname of first author
    browser["auth2"] =  surname
    browser["atitle2"] = title
    response = browser.submit()
    sourcecode = response.get_data()
    result = re.findall(r"\<table cellspacing=1 cellpadding=1 width=600 border=0\>.*?\<\/table\>" ,sourcecode, re.DOTALL)
    if len(result) > 0:
        html=result[0] 

In [126]:
html.find('10.1103/physrevd.87.095010')

-1

In [127]:
r=requests.get('http://search.crossref.org/?q=%s, %s' %(title,surname))

In [132]:
r.text.split('http://dx.doi.org/')[1].split('>')[0]

u"10.1103/physrevd.87.095010'"

In [133]:
surname=u'Ordóñez-Lazo'
title='Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics'
r=requests.get('http://search.crossref.org/?q=%s, %s' %(title,surname))
r.text.split('http://dx.doi.org/')[1].split('>')[0]

u"10.1088/1742-6596/635/7/072084'"

In [135]:
r.text.split('http://dx.doi.org/')[1].split('>')[0]

u"10.1088/1742-6596/635/7/072084'"

In [148]:
json='https://api.crossref.org/v1/works/'
rr=requests.get( json+r.text.split(json)[1].split("\'>")[0] )

In [155]:
chktitle = re.sub(r"\$.*?\$","",title) # better remove all math expressions
chktitle = re.sub(r"[^a-zA-Z0-9 ]", " ", chktitle).split(' ')[0]
if chktitle:
    if rr.json().has_key('message'):
        if rr.json()["message"]['title'][0].find(chktitle)!=-1:
            chk=True

0


In [154]:
rr.json()["message"]['title']

[u'Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics']

In [317]:
lower_first_char = lambda s: s[:1].lower() + s[1:] if s else ''
def search_doi(surname='Florez',\
    title=r'Baryonic violation of R-parity from anomalous $U(1)_H$',other=''):
    '''
    Search doi from http://search.crossref.org/ 
    '''
    import re
    import requests
    doi={}
    search=''
    if surname:
        search=surname
    if title:
        if len(search)>0:
            search=search+', '+title
    if other:
        if len(search)>0:
            search=search+', '+other
            
    r=requests.get('http://search.crossref.org/?q=%s' %search)
    urldoi='http://dx.doi.org/'
    doitmp=r.text.split(urldoi)[1].split("\'>")[0]
    if doitmp:
        json='https://api.crossref.org/v1/works/'
        rr=requests.get( json+urldoi+doitmp )
        if rr.json().has_key('message'):
            chktitle = re.sub(r"\$.*?\$","",title) # better remove all math expressions
            chktitle = re.sub(r"[^a-zA-Z0-9 ]", " ", chktitle).split(' ')
            if chktitle:
                if not -1 in [(rr.json()["message"]['title'][0]).find(w)  for w in chktitle]:
                    doi=rr.json()["message"]
                    
    return doi
    
def general_search_doi(surname='Florez',\
    title=r'Baryonic violation of R-parity from anomalous $U(1)_H$'):
    '''
    Search doi from http://search.crossref.org/ with special format
    '''
    doi=search_doi(surname,title)
    if doi.has_key('author'):
        doi['Author']=doi['author'][0]['family']
    if doi.has_key('title'):                
        doi['Article Title']=doi['title'][0]                                            
    if doi.has_key('container-title') and len(doi['container-title'])==2:                    
        doi['Journal Title']=doi['container-title'][1]                        
    if doi.has_key('published-online'):                    
        doi['Year']=str(doi['published-online']['date-parts'][0][0])
    for k in ['Volume','Issue','Page']:                    
        if doi.has_key(lower_first_char(k)):                        
            doi[k]=doi['volume']                                                 
                        
    return doi

In [322]:
general_search_doi('Florez','Baryonic violation of  R  parity from anomalous  U ( 1  ) H')

{'Article Title': u' Baryonic violation of  R  parity from anomalous  U ( 1  ) H   ',
 'Author': u'Florez',
 u'DOI': u'10.1103/physrevd.87.095010',
 u'ISSN': [u'1550-7998', u'1550-2368'],
 'Issue': u'87',
 'Journal Title': u'Phys. Rev. D',
 u'URL': u'http://dx.doi.org/10.1103/physrevd.87.095010',
 'Volume': u'87',
 'Year': 2013,
 u'article-number': u'095010',
 u'author': [{u'affiliation': [], u'family': u'Florez', u'given': u'Andres'},
  {u'affiliation': [], u'family': u'Restrepo', u'given': u'Diego'},
  {u'affiliation': [], u'family': u'Velasquez', u'given': u'Mauricio'},
  {u'affiliation': [], u'family': u'Zapata', u'given': u'Oscar'}],
 u'container-title': [u'Physical Review D', u'Phys. Rev. D'],
 u'created': {u'date-parts': [[2013, 5, 17]],
  u'date-time': u'2013-05-17T15:18:44Z',
  u'timestamp': 1368803924000},
 u'deposited': {u'date-parts': [[2015, 3, 26]],
  u'date-time': u'2015-03-26T01:43:48Z',
  u'timestamp': 1427334228000},
 u'indexed': {u'date-parts': [[2015, 12, 20]],
  u'

In [321]:
surname=u'Ordóñez-Lazo'
title='Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics'
general_search_doi(surname,title)

{'Article Title': u'Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics',
 'Author': u'Ord\xf3\xf1ez-Lasso',
 u'DOI': u'10.1088/1742-6596/635/7/072084',
 u'ISSN': [u'1742-6588', u'1742-6596'],
 'Issue': u'635',
 'Journal Title': u'Journal of Physics: Conference Series',
 'Page': u'635',
 u'URL': u'http://dx.doi.org/10.1088/1742-6596/635/7/072084',
 'Volume': u'635',
 'Year': 2015,
 u'author': [{u'affiliation': [],
   u'family': u'Ord\xf3\xf1ez-Lasso',
   u'given': u'Andr\xe9s F'},
  {u'affiliation': [], u'family': u'Sanz-Vicario', u'given': u'Jos\xe9 L'},
  {u'affiliation': [], u'family': u'Mart\xedn', u'given': u'Fernando'}],
 u'container-title': [u'J. Phys.: Conf. Ser.',
  u'Journal of Physics: Conference Series'],
 u'created': {u'date-parts': [[2015, 9, 7]],
  u'date-time': u'2015-09-07T11:19:24Z',
  u'timestamp': 1441624764000},
 u'deposited': {u'date-parts': [[2015, 9, 8]],
  u'date-time': u'2015-09-08T08:36:57Z',
  u't

In [249]:
doitmp
print rr.json()["message"]['title'][0].split(' ')
print chktitle

[u'', u'Baryonic', u'violation', u'of', u'', u'R', u'', u'parity', u'from', u'anomalous', u'', u'U', u'(', u'1', u'', u')', u'H', u'', u'', u'']
['Baryonic', 'violation', 'of', 'R', 'parity', 'from', 'anomalous', '']


In [254]:
[(rr.json()["message"]['title'][0]).find(w) for w in chktitle]


[1, 10, 20, 24, 27, 34, 39, 0]

In [252]:
rr.json()["message"]['title'][0].f

u' Baryonic violation of  R  parity from anomalous  U ( 1  ) H   '

In [242]:
    print 1

u'Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics'

In [175]:
rr.json()

{u'message': {u'DOI': u'10.1103/physrevd.87.095010',
  u'ISSN': [u'1550-7998', u'1550-2368'],
  u'URL': u'http://dx.doi.org/10.1103/physrevd.87.095010',
  u'article-number': u'095010',
  u'author': [{u'affiliation': [], u'family': u'Florez', u'given': u'Andres'},
   {u'affiliation': [], u'family': u'Restrepo', u'given': u'Diego'},
   {u'affiliation': [], u'family': u'Velasquez', u'given': u'Mauricio'},
   {u'affiliation': [], u'family': u'Zapata', u'given': u'Oscar'}],
  u'container-title': [u'Physical Review D', u'Phys. Rev. D'],
  u'created': {u'date-parts': [[2013, 5, 17]],
   u'date-time': u'2013-05-17T15:18:44Z',
   u'timestamp': 1368803924000},
  u'deposited': {u'date-parts': [[2015, 3, 26]],
   u'date-time': u'2015-03-26T01:43:48Z',
   u'timestamp': 1427334228000},
  u'indexed': {u'date-parts': [[2015, 12, 20]],
   u'date-time': u'2015-12-20T15:35:41Z',
   u'timestamp': 1450625741157},
  u'issue': u'9',
  u'issued': {u'date-parts': [[2013, 5, 16]]},
  u'license': [{u'URL': u'htt

In [334]:
run ./doi.py 'Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics' 'Ordóñez-Lazo'

General search<br/>

            <br/>DOI: <a href="http://dx.doi.org/10.1088/1742-6596/635/7/072084">http://dx.doi.org/10.1088/1742-6596/635/7/072084</a><br/>
            Ref: Ordóñez-Lasso,Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics,Journal of Physics: Conference Series,<strong>635</strong>,635,635,2015<br/>
            <br>
            CODE at <a href="https://github.com/restrepo/webpy">GitHub</a>: doi.py<br/><br/>
        
Official search at <a href="http://www.crossref.org/guestquery/#textsearch">crossref</a><br/>
        or <a href="http://search.crossref.org/">Search Crossref</a><br/>


In [335]:
run ./doi.py 'Molecular structure of one electron diatomic molecules subject' 'Lazo'

General search<br/>

            <br/>DOI: <a href="http://dx.doi.org/10.1088/1742-6596/635/7/072084">http://dx.doi.org/10.1088/1742-6596/635/7/072084</a><br/>
            Ref: Ordóñez-Lasso,Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics,Journal of Physics: Conference Series,<strong>635</strong>,635,635,2015<br/>
            <br>
            CODE at <a href="https://github.com/restrepo/webpy">GitHub</a>: doi.py<br/><br/>
        
Official search at <a href="http://www.crossref.org/guestquery/#textsearch">crossref</a><br/>
        or <a href="http://search.crossref.org/">Search Crossref</a><br/>


In [363]:
run ./doi.py "Molecular structure of one electron diatomic molecules subject" "Lasso"

General search:<br/>

            <br/>DOI: <a href="http://dx.doi.org/10.1088/1742-6596/635/7/072084">http://dx.doi.org/10.1088/1742-6596/635/7/072084</a><br/>
            Ref: Ord  ez Lasso,Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics,Journal of Physics: Conference Series,<strong>635</strong>,635,635,2015<br/>
            <br>
            CODE at <a href="https://github.com/restrepo/webpy">GitHub</a>: doi.py<br/><br/>
        
Official search at <a href="http://www.crossref.org/guestquery/#textsearch">crossref</a><br/>
        or <a href="http://search.crossref.org/">Search Crossref</a><br/>


In [348]:
print ref.encode('utf-8')

Ordóñez-Lasso,Molecular structure of one-electron diatomic molecules subject to plasma screening and its effect on the dynamics,Journal of Physics: Conference Series,<strong>635</strong>,635,635,2015


In [340]:
print ref.decode('utf-8')

UnicodeEncodeError: 'ascii' codec can't encode characters in position 3-4: ordinal not in range(128)