In [4]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

# Standard library
import json

# Module specific
#import ads
import ads.sandbox as ads
import pandas as pd
import markovify

In [5]:
# Which metadata fields do we want to retrieve from the ADS API?
# (basically everything apart from 'aff' and 'body' to reduce data volume)
FIELDS = ['pub', 'citation_count', 'year', 'first_author_norm',
          'title', 'property'
         ]

In [7]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

# Standard library
import json
import re
import argparse

# Module specific
import ads
#import ads.sandbox as ads
import pandas as pd

# Which metadata fields do we want to retrieve from the ADS API?
FIELDS = ['pub', 'citation_count', 'year', 'first_author_norm',
          'title', 'property'
         ]

def getPapers(year=1991,rows=200000, mincite=2):

    query = ads.SearchQuery(rows=rows, 
        year=year, 
        fl=FIELDS, 
        database = "astronomy", sort='citation_count desc',
           fq=['database:astronomy', 'property:refereed', 
           'property:article', 'citation_count:[{} TO *]'.format(mincite)])

    return query

def makeDataframe(year=1991,rows=200000, mincite=2):
    papers = []
    for x in getPapers(year=year, rows=rows, mincite=mincite):
        papers.append(x)

    df = pd.DataFrame(
        columns=['lastname', 'title'],
        data=[returnLastnameTitle(q) for q in papers])

    return df

def returnLastnameTitle(q):
    # last name
    try:
        lastname = q.first_author_norm.split(',')[0]
        lastname = re.sub(r'([^\s\w]|_)+', '', lastname)
    except AttributeError:
        return ['none','none']

    # paper title
    try:
        title = q.title[0]
        title = re.sub(r'([^\s\w]|_)+', '', title)
    except TypeError:
        return ['none','none']

    return [lastname,title]

def toJson(year=1991,rows=200000, mincite=2):
    df = makeDataframe(year=year,rows=rows, mincite=mincite)
    df.to_json('data/{}.json'.format(year))



In [51]:
def printReference(year):
    df = pd.read_json('data/{}.json'.format(year), )
    df.sort_index(inplace=True)
    textstr = '. '.join([df.title[i] for i in range(df.shape[0])])
    text_model = markovify.Text(textstr, state_size=1, )
    outtitle = text_model.make_short_sentence(90)

    author = df['lastname'].value_counts()[df['lastname'].value_counts() > 1].sample().index[0]

    print('{} et al., {} ({})'.format(author,outtitle,year))

In [53]:
printReference(2012)

Capozziello et al., Conformal and Physical Evolution of Candidate Cluster of our Galaxy. (2012)


Sudilovsky Cyclic Adenosine 3',5'-Monophosphate during Glucose Repression in the Rat Liver




Unnamed: 0,lastname,title
0,Sudilovsky,"Cyclic Adenosine 3',5'-Monophosphate during Gl..."
1,Sudilovsky,GRB120404A: GROND observations show steeply de...
2,Sudilovsky,GROND observations of GRB 120311A
3,Sudilovsky,GRB 120324A: GROND observations
4,Sudilovsky,GRB 120401A: GROND detection of an optical/NIR...
5,Sudilovsky,GRB 131002B: GROND upper limits
6,Sudilovsky,GRB 131024A: GROND upper limits
7,Sudilovsky,GRB 130727A: GROND observations
8,Sudilovsky,GRB 130831B: GROND upper limits
9,Sudilovsky,GRB 130211A: retraction of afterglow candidate


[]

[[u'Sudilovsky',
  u'Cyclic Adenosine 35Monophosphate during Glucose Repression in the Rat Liver'],
 [u'Sudilovsky',
  u'GRB120404A GROND observations show steeply decaying afterglow'],
 [u'Sudilovsky', u'GROND observations of GRB 120311A'],
 [u'Sudilovsky', u'GRB 120324A GROND observations'],
 [u'Sudilovsky',
  u'GRB 120401A GROND detection of an opticalNIR afterglow candidate'],
 [u'Sudilovsky', u'GRB 131002B GROND upper limits'],
 [u'Sudilovsky', u'GRB 131024A GROND upper limits'],
 [u'Sudilovsky', u'GRB 130727A GROND observations'],
 [u'Sudilovsky', u'GRB 130831B GROND upper limits'],
 [u'Sudilovsky', u'GRB 130211A retraction of afterglow candidate'],
 [u'Sudilovsky', u'GRB 130903A GROND upper limits'],
 [u'Sudilovsky',
  u'GRB 110223B GROND detection of optical afterglow candidate'],
 [u'Sudilovsky', u'GRB 110825A GROND observations'],
 [u'Sudilovsky', u'GRB 120804A GROND upper limits'],
 [u'Sudilovsky', u'GROND observations of GRB 120327A'],
 [u'Sudilovsky', u'GRB 130925A GROND a

In [45]:
df = pd.read_json('data/2015.json', )
df.sort(inplace=True)

  from ipykernel import kernelapp as app


(2000, 2)

Calibration of the mass in three dimensions: II.
Time Lags and 250/350 μm surface compositions of live <SUP>244</SUP>Pu in HL Tau.
The uranium isotopic anomalies in the EAGLE simulation.
Gas Giant Extrasolar System OGLE-2014-BLG-1050L.


u'Alonso'

  app.launch_new_instance()


A Search of a parallel collisionless shock. Saha et al. (1992)


In [119]:
df.shape

(2000, 2)