In [46]:
import json
import pandas as pd
import uuid
import pprint as pp
pprint = pp.PrettyPrinter(indent=4).pprint

In [18]:
uuid.uuid4()

UUID('05e17252-7d6f-4efd-afa1-07d388a60d9e')

In [20]:
{
    "id": str(uuid.uuid4()),
    "name": "Sample Project",
    "papers": [],
}

{'id': '6e7d44c0-6033-4d2d-9fa0-aec24981ba35',
 'name': 'Sample Project',
 'papers': []}

In [21]:
# Paper general information
{
    "id": str(uuid.uuid4()),
    "title": "Sample Paper",
    "authors": ["Author 1", "Author 2"],
    "abstract": "This is a sample abstract",
    "year": 2020,
}

{'id': '5372b106-228e-4a32-846d-2f323a19b4cc',
 'title': 'Sample Paper',
 'authors': ['Author 1', 'Author 2'],
 'abstract': 'This is a sample abstract',
 'year': 2020}

In [53]:
import requests
import xml.etree.ElementTree as ET



def get_semantic_scholar_info(arxiv_id):
    base_url = "https://api.semanticscholar.org/v1/paper/arXiv:"
    
    # Fetch data from Semantic Scholar
    response = requests.get(base_url + arxiv_id)
    data = response.json()

    # Return citation count and impact score
    # Note: Semantic Scholar doesn't have a generic "impact score", but citations can be a metric of impact.
    return len(data.get("citations", []))



def get_paper_base_data(paper_id):
    base_url = "http://export.arxiv.org/api/query?"
    query = "id_list=" + paper_id
    
    # Send a GET request to the arXiv API
    response = requests.get(base_url + query)
    
    # Raise an exception if the request was unsuccessful
    response.raise_for_status()
    
    # Parse the XML response
    root = ET.fromstring(response.text)
    
    # Extract information from XML
    ns = {'default': 'http://www.w3.org/2005/Atom'}
    entry = root.find('default:entry', ns)
    
    title = entry.find('default:title', ns).text.strip()
    authors = [author.find('default:name', ns).text for author in entry.findall('default:author', ns)]
    abstract = entry.find('default:summary', ns).text.strip()
    year = entry.find('default:published', ns).text.split('-')[0]
    
    # Return the extracted information in the desired format
    return {
        "paper_id": paper_id,
        "title": title,
        "authors": authors,
        "abstract": abstract,
        "year": year,
        "citations": get_semantic_scholar_info(paper_id)
    }

# Test
paper_id = "1710.03599"
# paper_id = "2101.12345"
pprint(get_paper_base_data(paper_id))


{   'abstract': 'Quantum computing allows for the potential of significant '
                'advancements in\n'
                'both the speed and the capacity of widely used machine '
                'learning techniques.\n'
                'Here we employ quantum algorithms for the Hopfield network, '
                'which can be used\n'
                'for pattern recognition, reconstruction, and optimization as '
                'a realization of a\n'
                'content-addressable memory system. We show that an '
                'exponentially large network\n'
                'can be stored in a polynomial number of quantum bits by '
                'encoding the network\n'
                'into the amplitudes of quantum states. By introducing a '
                'classical technique for\n'
                'operating the Hopfield network, we can leverage quantum '
                'algorithms to obtain a\n'
                'quantum computational complexity that is logarit

In [44]:
paper_in_project = {
    "paper_id": "1710.03599",
    "project_id": "1234567890",
    "rating": 1,
    "annotations": {},
    "engagement": {
        "view_count": 0,
        "view_duration": 0,
    },
    "saved": True,
    "read": False
}

In [48]:
import pysondb

In [62]:
def add_project(name):
    project = {
        "name": name,
        "papers": [],
    }
    return projects.add(project) # returns id

utf-8


823153983130900377

In [84]:
def add_paper_to_project(project_id, paper_id):
    project_paper = {
        "project_id": project_id,
        "paper_id": paper_id,
        "rating": 1,
        "annotations": {},
        "engagement": {
            "click_count": 0,
            "view_duration": 0,
        },
        "saved": False,
        "read": False
    }
    id = projPapers.add(project_paper)

    old_papers = projects.getById(project_id)["papers"]
    old_papers.append(id)

    projects.updateById(project_id, {"papers": old_papers})

In [82]:
def update_engagement(metric, value, project_id, paper_id):
    db_paper_id = projPapers.getByQuery({"paper_id": paper_id, "project_id": project_id})[0]["id"]
    
    if metric == "click":
        upd_engagement = projPapers.getById(db_paper_id)["engagement"]
        upd_engagement["click_count"] += 1
        projPapers.updateById(db_paper_id, {"engagement": upd_engagement})
    elif metric == "view":
        upd_engagement = projPapers.getById(db_paper_id)["engagement"]
        upd_engagement["view_duration"] += value
        projPapers.updateById(db_paper_id, {"engagement": upd_engagement})
    elif metric == "read":
        projPapers.updateById(db_paper_id, {"read": value})
    elif metric == "save":
        projPapers.updateById(db_paper_id, {"saved": value})


In [87]:
papers = pysondb.db.getDb("data/papers")
projects = pysondb.db.getDb("data/projects")
projPapers = pysondb.db.getDb("data/project_papers")

papers.deleteAll()
projects.deleteAll()
projPapers.deleteAll()

# testing: this is intital data

paper_ids = ['1709.02779',
 '1611.09347',
 '1611.09347v2',
 '1708.09757',
 '1709.02779',
 '1707.08561v1',
 '1708.09757v1',
 '2208.08068',
 '1710.03599v1',
 '1908.04480v2']

proj_id = add_project("Sample Project")

for paper_id in paper_ids:
    papers.add(get_paper_base_data(paper_id))
    add_paper_to_project(proj_id, paper_id)

    

update_engagement("click", 1, proj_id, paper_id)
update_engagement("view", 15, proj_id, paper_id)
update_engagement("view", 15, proj_id, paper_id)
update_engagement("read", True, proj_id, paper_id)
update_engagement("save", True, proj_id, paper_id)


utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
utf-8
