In [20]:
import time
import json
import pickle
import gradio as gr
from annoy import AnnoyIndex
from transformers import pipeline

from database import Database, ADSEntry
from text_to_vec import spacy_tokenizer

# load database
settings = json.load(open("settings.json", 'r'))
ADSDatabase = Database( settings=settings['database'], dtype=ADSEntry )

# ranks words by importance/occurence
vectorizer = pickle.load(open("vectorizer.pkl", "rb"))

# reduce dimensionality
pca = pickle.load(open("pca.pkl", "rb"))

# lambda function to process input text
process_input = lambda x: pca.transform(vectorizer.transform([spacy_tokenizer(x)]).toarray())[0]

class TextAI:
    def __init__(self, db, model_checkpoint='./models/checkpoint-90000', neighbor_file=f'test_514.ann'):
        self.db = db
        self.model = pipeline('text-generation',model=model_checkpoint, tokenizer='gpt2', config={'max_length':2000})
        ndim = int(neighbor_file.split('_')[-1].split('.')[0])
        self.neighbors = AnnoyIndex(ndim, 'angular')
        self.neighbors.load(neighbor_file) # super fast, will just mmap the file

    def __call__(self, x):
        # generate text
        text_suggestions = []
        for i in range(4):
            text_suggestions.append(self.model(x)[0]['generated_text'].replace(x,'').strip())
        
        #text = self.model(x, max_length=1000, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=1)[0]['generated_text']
        # get similar abstracts
        nids = self.neighbors.get_nns_by_vector(process_input(x), 10, search_k=-1, include_distances=False)
        entrys = self.db.query(self.db.dtype.id.in_(nids))
        paper_recs = [[entry.title,entry.bibcode,entry.abstract] for entry in entrys]
        return text_suggestions, paper_recs

textai = TextAI(ADSDatabase)

In [23]:
custom_text = "One of the key drivers of the Mars Exploration Program is the search for evidence of past or present life. In this context, the most relevant martian environments to search for extant life are those associated with liquid water, and locations that experience"

text, papers = textai(custom_text)
print(text)

for i in range(len(papers)):
    print(f"{papers[i][0]} ({papers[i][1]}) {papers[i][2]} \n")

['extreme fluv', 'intense martian', 'large fluv', 'both outflow']
Life in extreme environments (2001Natur.409.1092R) Each recent report of liquid water existing elsewhere in the Solar System has reverberated through the international press and excited the imagination of humankind. Why? Because in the past few decades we have come to realize that where there is liquid water on Earth, virtually no matter what the physical conditions, there is life. What we previously thought of as insurmountable physical and chemical barriers to life, we now see as yet another niche harbouring 'extremophiles'. This realization, coupled with new data on the survival of microbes in the space environment and modelling of the potential for transfer of life between celestial bodies, suggests that life could be more common than previously thought. Here we examine critically what it means to be an extremophile, and the implications of this for evolution, biotechnology and especially the search for life in the U

In [None]:

def generate(input_text, radio):
    text, papers = textai(input_text)
    return '\n'.join(text)

demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(lines=2, value=custom_text),
        gr.Radio(["1","2","3"]),
    ],
    
    outputs="text",
    live=True
)
demo.launch()