# Example Notebook for Integrating with Hugging Face

# Setup

In [None]:
import requests
import huggingface_hub as hfhub

In [None]:
from sciterra import Atlas
from sciterra import Cartographer
from sciterra.librarians import SemanticScholarLibrarian # or ADSLibrarian
from sciterra.vectorization import SciBERTVectorizer # among others

### Settings

In [None]:
# Settings
atlas_dirpath = "../atlas"
# model = "Falconsai/text_summarization"
model = "liminerity/Phigments12"
n_summarized = 10

### Sciterra

In [None]:
atl = Atlas.load(atlas_dirpath)

In [None]:
# Create a cartographer with a Semantic Scholar librarian and a SciBERT vectorizer
crt = Cartographer(
    librarian=SemanticScholarLibrarian(),
    vectorizer=SciBERTVectorizer(),
)

### HFHub

In [None]:
# Login
token = hfhub.get_token()
if token is None:
    hfhub.login()
    token = hfhub.get_token()

# Format for Inference API
headers = {"Authorization": f"Bearer {token}"}

In [None]:
def query(payload):

	api_url = f"https://api-inference.huggingface.co/models/{model}"
	response = requests.post(api_url, headers=headers, json=payload)
	return response

# Exploration

In [None]:
# Find the publications most-similar to the original
sorted_keys, sorted_values = crt.sort(atl, center=atl.center)

In [None]:
# Get the abstracts for the most-similar publications
combined_abstracts = "Please summarize the following abstracts:"
for i, identifier in enumerate(sorted_keys[:n_summarized]):

    combined_abstracts += f"This is the {i}th abstract:\n"
    combined_abstracts += atl.publications[identifier].abstract
    combined_abstracts += "\n"

print(combined_abstracts)

In [None]:
# Get the abstracts for the most-similar publications
combined_abstracts = ""
for i, identifier in enumerate(sorted_keys[:n_summarized]):

    combined_abstracts += '\n\n' + atl.publications[identifier].abstract

print(combined_abstracts)

In [None]:
response = query({"input": combined_abstracts})
response

In [None]:
from evaluate import load
eval_module = load("rouge")

In [None]:
eval_module.compute(predictions=[prediction,], references=[combined_abstracts,])