<a href="https://colab.research.google.com/github/restrepo/lens/blob/master/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lens
Example of lens search. Check aginst:

In [172]:
lens_id='034-341-431-911-594'#'024-307-056-731-628'
print('https://www.lens.org/lens/scholar/search/results?q=lens_id:{}'.format(lens_id))

https://www.lens.org/lens/scholar/search/results?q=lens_id:034-341-431-911-594


In [173]:
import pandas as pd

In [174]:
ln=pd.read_json('https://raw.githubusercontent.com/restrepo/lens/master/data/lens_{}.json'.format(lens_id)) 

In [175]:
ln.columns

Index(['_id', 'abstract', 'authors', 'chemicals', 'citation_ids',
       'clinical_trials', 'date_published', 'fields_of_study', 'funding',
       'issue', 'journal', 'keywords', 'languages', 'mesh_terms', 'pages_end',
       'pages_start', 'publication_supplementary_type', 'publication_type',
       'record_lens_id', 'referenced_by_count', 'referenced_by_patent_count',
       'references', 'source_urls', 'title', 'volume', 'year_published'],
      dtype='object')

## Explanation of each columns and its several parts
Nested jsons are extracted as new columns if necessary, e.g DOIs, etc

In [176]:
i=0

### Article identification in several data bases:

lens identification number:

In [177]:
ln.loc[i,'record_lens_id']

'034-341-431-911-594'

Also in "_id".
Reported bug in json identifier: missing last number

In [178]:
#https://stackoverflow.com/a/15164639/2268280
ln['_id'].apply(lambda x: "%014d"%x).str.replace(r'([0-9]{3})',r'\g<1>-')

0    034-341-431-911-59
Name: _id, dtype: object

Other identification numbers

In [179]:
ln.citation_ids.apply(len)

0    3
Name: citation_ids, dtype: int64

In [180]:
ln.citation_ids[0]

[{'type': 'core', 'value': 'core45448247'},
 {'type': 'doi', 'value': '10.1103/physrevd.93.015012'},
 {'type': 'magid', 'value': 'mag2270814667'}]

In [220]:
ln['magid']=ln.citation_ids.apply(lambda x: [y.get('value') for y in x if y.get('type')=='magid']).str[0]
ln['doi']=ln.citation_ids.apply(lambda x: [y.get('value') for y in x if y.get('type')=='doi']).str[0]
ln['core']=ln.citation_ids.apply(lambda x: [y.get('value') for y in x if y.get('type')=='core']).str[0]

In [221]:
magid=ln.fillna('').loc[i,'magid'].split('mag')[-1]
doi=ln.fillna('').loc[i,'doi']
core=ln.fillna('').loc[i,'core'].split('core')[-1]
if magid:
    print('Microsoft academic link for article: https://academic.microsoft.com/#/detail/{}'.format(magid))
if doi:
    print('DOI:                                 https://oadoi.org/{}'.format(doi))
if doi:
    print('core:                                https://core.ac.uk/display/{}'.format(core))    

Microsoft academic link for article: https://academic.microsoft.com/#/detail/2270814667
DOI:                                 https://oadoi.org/10.1103/physrevd.93.015012
core:                                https://core.ac.uk/display/45448247


### Open Access.
Si existe el link a core, entonces el artículo es OpenAccess

In [222]:
ln['Open_Access']=~ln['core'].isnull()

In [223]:
ln.loc[i,'Open_Access']

True

### Authors

In [183]:
ln.loc[i,'authors']

[{'affiliations': [{'grid': {'addresses': [{'city': 'Liège',
       'country_code': 'BE',
       'lat': 50.583291,
       'lon': 5.5590530000000005,
       'state_code': ''}],
     'email_address': None,
     'id': 'grid.4861.b',
     'links': ['https://www.uliege.be/cms/c_8699436/fr/portail-uliege'],
     'name': 'University of Liège',
     'status': 'active',
     'types': ['Education'],
     'wikipedia_url': 'http://en.wikipedia.org/wiki/University_of_Li%C3%A8ge'},
    'ids': [{'type': 'magid', 'value': 'mag157674565'},
     {'type': 'grid', 'value': 'grid.4861.b'},
     {'type': 'isni', 'value': '0000000108057253'},
     {'type': 'fundref', 'value': '501100005627'},
     {'type': 'fundref', 'value': '501100006673'},
     {'type': 'orgref', 'value': '2273279'},
     {'type': 'wikidata', 'value': 'q1334582'}],
    'name': 'University of Liège',
    'name_raw': "Université de Liège > Département d'astrophys., géophysique et océanographie (AGO) > Inter. fondamentales en physique et ast

Full names

In [184]:
ln['full_names']=ln['authors'].apply(lambda x: 
            [y.get('first_name')+' '+y.get('last_name') for y in x])

In [185]:
ln.loc[i,'full_names']

['Diego Aristizabal Sierra',
 'Avelino Vicente',
 'Juan Herrero-Garcia',
 'Diego Restrepo']

Affilitions info. Each author can have several affiliations

In [186]:
ln['affiliations']=ln['authors'].apply(lambda x: 
    [y.get('affiliations') for y in x]).apply(
    lambda x: [[z.get('name') for z in y] for y in x])

In [187]:
ln.loc[i,'affiliations']

[['University of Liège'],
 ['Spanish National Research Council'],
 ['Royal Institute of Technology'],
 ['University of Antioquia']]

In [188]:
ln['affiliations_details']=ln['authors'].apply(lambda x: 
    [y.get('affiliations') for y in x]).apply(
    lambda x: [ [z.get('grid').get('addresses') for z in y] for y in x])

In [189]:
ln['affiliations_details_lat']=ln['affiliations_details'].apply(
        lambda x: [ [[w.get('lat') for w in z] for z in y] for y in x])
ln['affiliations_details_lon']=ln['affiliations_details'].apply(
        lambda x: [ [[w.get('lon') for w in z] for z in y] for y in x])
ln['affiliations_details_cc']=ln['affiliations_details'].apply(
        lambda x: [ [[w.get('country_code') for w in z] for z in y] for y in x])

Sample of Affiliations details

In [190]:
ln.loc[i,'affiliations_details'][0][0]

[{'city': 'Liège',
  'country_code': 'BE',
  'lat': 50.583291,
  'lon': 5.5590530000000005,
  'state_code': ''}]

Extraction of relevant details

In [191]:
ln.loc[i,'affiliations_details_cc']

[[['BE']], [['ES']], [['SE']], [['CO']]]

In [192]:
ln.loc[i,'affiliations_details_lat']

[[[50.583291]], [[40.595884]], [[59.34748]], [[6.267417]]]

In [193]:
ln.loc[i,'affiliations_details_lon']

[[[5.5590530000000005]], [[-3.688175]], [[18.073526]], [[-75.568389]]]

### Journal

In [194]:
ln['journal'].loc[0]

{'country': 'United States',
 'issn': [{'type': 'print', 'value': '24700010'},
  {'type': 'electronic', 'value': '24700029'}],
 'publisher': 'American Physical Society (APS)',
 'title_full': 'Physical Review D'}

In [195]:
ln['journal_name']=ln['journal'].apply(lambda x: x.get('title_full') )
ln['issn']=ln['journal'].apply(lambda x: x.get('issn') )
ln['journal_country']=ln['journal'].apply(lambda x: x.get('country') )
ln['publisher']=ln['journal'].apply(lambda x: x.get('publisher') )

In [196]:
ln[['journal_name','issn','journal_country','publisher']]

Unnamed: 0,journal_name,issn,journal_country,publisher
0,Physical Review D,"[{'type': 'print', 'value': '24700010'}, {'typ...",United States,American Physical Society (APS)


### Citations

Scholarly Citations:

In [197]:
ln.loc[i,'referenced_by_count']

14

Patent Citations

In [198]:
ln.loc[i,'referenced_by_patent_count']

0

### Fields of study

In [199]:
ln['fields_of_study_name']=ln['fields_of_study'].apply(lambda x: [y.get('name') for y in x])

In [200]:
ln.loc[i,'fields_of_study_name']

['Particle physics',
 'Quantum chromodynamics',
 'Physics',
 'Higgs boson',
 'Physics beyond the Standard Model',
 'Two-Higgs-doublet model',
 'Z-channel',
 'Quantum electrodynamics',
 'Gauge boson',
 'Standard Model',
 'Large Hadron Collider']

### PDFs

In [201]:
ln['PDFs']=ln['source_urls'].apply(lambda x: [y.get('url') for y in x if y.get('type')=='pdf'])

In [202]:
ln['arXiv']=ln['source_urls'].apply(lambda x: [y.get('url') for y in x if y.get('type')=='pdf' 
                                   and y.get('url').find('arxiv.org')>=0]).str[0].str.replace(
                           'http://arxiv.org/pdf/([0-9]+\.[0-9]+).pdf','\g<1>' )

In [203]:
ln.loc[i,'PDFs']

['http://arxiv.org/pdf/1510.03437.pdf',
 'http://digital.csic.es/bitstream/10261/131047/1/FPA2014-58183-P-Diboson.pdf']

In [204]:
ln.loc[i,'arXiv']

'1510.03437'

### Self-explanatory fields

In [205]:
ln[['abstract', 'date_published', 'funding',
       'issue', 'keywords', 'languages', 'mesh_terms', 'pages_end',
       'pages_start', 'publication_supplementary_type', 'publication_type',
       'title', 'volume', 'year_published']].loc[[0]]

Unnamed: 0,abstract,date_published,funding,issue,keywords,languages,mesh_terms,pages_end,pages_start,publication_supplementary_type,publication_type,title,volume,year_published
0,The ATLAS Collaboration (and also CMS) has rec...,2016-01-21T00:00:00+00:00,"[{'org_id': '10.13039/501100002661', 'funding_...",1,[],[en],[],,15012,[],journal article,Diboson anomaly: Heavy Higgs resonance and QCD...,93,2016


In [206]:
ln.loc[i,'references']

[{'lens_id': 157243461017},
 {'lens_id': 511908356197},
 {'lens_id': 554765889905},
 {'lens_id': 625262539586},
 {'lens_id': 656423764547},
 {'lens_id': 1326319377136},
 {'lens_id': 1514844071549},
 {'lens_id': 2493872779822},
 {'lens_id': 2494092339066},
 {'lens_id': 2566123636854},
 {'lens_id': 2825915349771},
 {'lens_id': 3277397450819},
 {'lens_id': 3498731332786},
 {'lens_id': 3596585624382},
 {'lens_id': 3610408284260},
 {'lens_id': 3637284790067},
 {'lens_id': 3759492979011},
 {'lens_id': 4081061587669},
 {'lens_id': 4131186365653},
 {'lens_id': 4231001148730},
 {'lens_id': 4807770246664},
 {'lens_id': 4828167432673},
 {'lens_id': 4836482296561},
 {'lens_id': 5007459714612},
 {'lens_id': 5141667571468},
 {'lens_id': 5568654687238},
 {'lens_id': 5723578187014},
 {'lens_id': 5776840922724},
 {'lens_id': 6191093647811},
 {'lens_id': 6400858984624},
 {'lens_id': 6600218844479},
 {'lens_id': 6692074167290},
 {'lens_id': 6918307828900},
 {'lens_id': 7006910898580},
 {'lens_id': 743654