In [1]:
# %pip install transformers
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import re
import numpy as np
import ipywidgets as widgets
import requests
import json
from dataclasses import dataclass
from typing import Dict, List

In [10]:
# Parameters
max_depth = 2
ignore_related = False
ignore_referenced = False
base_works_url = "https://api.openalex.org/works"

In [3]:
@dataclass
class Article:
    # Keeping track of some needed paper details
    id: str
    title: str
    inverted_abstract: Dict[str, List[int]]
    references: List[str]
    related: List[str]

    def get_abstract(self):
        abstract = dict()
        for k, v in self.inverted_abstract.items():
            for i in v:
                abstract[i] = k

        final = ""
        for i in sorted(abstract.keys()):
            final += abstract[i] + " "
        return final
    
    def __str__(self):
        return self.title

# Search for Article Title
Edit the title variable below to search for a paper. If not exact then returns 25 most relevant papers in the OpenAlex dataset. Select the paper in the dropdown menu.

In [4]:
title = "Attention is all"
title = title.replace(" ", "%20")
req = requests.get(base_works_url+f"?filter=title.search:{title}")
response = json.loads(req.content)

relevant_titles = [result['title'] for result in response['results']]
title_selector = widgets.Dropdown(
    options=relevant_titles,
    value=relevant_titles[0],
    description="Title: "
)
display(title_selector)

Dropdown(description='Title: ', options=('Attention is All you Need', 'Attention Is All You Need', 'All That G…

In [5]:
raise Exception("Please select correct title above. If done, run all cells below this one.")

Exception: Please select correct title above. If done, run all cells below this one.

In [18]:
def fetch_article(work_id):
    req = requests.get(base_works_url + '/' + work_id)
    result = json.loads(req.content)
    referenced_works = [work.split('/')[-1] for work in result['referenced_works']]
    related_works = [work.split('/')[-1] for work in result['related_works']]

    return Article(
        id,
        result['title'],
        result['abstract_inverted_index'],
        referenced_works,
        related_works
    )

In [21]:
index = relevant_titles.index(title_selector.value)
papers = dict()
root_id = response['results'][index]['id'].split('/')[-1]

papers[root_id] = fetch_article(id)

In [22]:
papers.keys()

dict_keys(['W2963403868'])

In [23]:
papers['W2963403868']

Article(id='W2963403868', title='Attention is All you Need', inverted_abstract={'The': [0, 19], 'dominant': [1], 'sequence': [2], 'transduction': [3], 'models': [4, 23, 59], 'are': [5], 'based': [6, 41], 'on': [7, 52], 'complex': [8], 'recurrent': [9], 'orconvolutional': [10], 'neural': [11], 'networks': [12], 'in': [13], 'an': [14, 31], 'encoder': [15, 27], 'and': [16, 28, 49, 68], 'decoder': [17, 29], 'configuration.': [18], 'best': [20, 90], 'performing': [21], 'such': [22], 'also': [24], 'connect': [25], 'the': [26, 88, 102], 'through': [30], 'attentionm': [32], 'echanisms.': [33], 'We': [34], 'propose': [35], 'a': [36, 111], 'novel,': [37], 'simple': [38], 'network': [39], 'architecture': [40], 'solely': [42], 'onan': [43], 'attention': [44], 'mechanism,': [45], 'dispensing': [46], 'with': [47, 77, 105], 'recurrence': [48], 'convolutions': [50], 'entirely.Experiments': [51], 'two': [53], 'machine': [54], 'translation': [55], 'tasks': [56], 'show': [57], 'these': [58], 'to': [60], 

In [24]:
use_references = ignore_referenced != True
use_related = ignore_related != True

related_works: Dict[int, List[Article]] = {}

def get_relevant_papers(current_depth: int, previous: List[Article]):
    related_works[current_depth] = []
    print(current_depth)
    for parent in previous:
        if use_references:
            for paper in parent.references:
                if paper not in papers.keys():
                    temp = fetch_article(paper)
                    papers.append(temp)
                    related_works[current_depth].append(temp)
            
        if use_related:
            for paper in parent.related:
                if paper not in papers.keys():
                    temp = fetch_article(paper)
                    papers.append(temp)
                    related_works[current_depth].append(temp)

    if current_depth <= max_depth:
        get_relevant_papers(current_depth+1, related_works[current_depth])