In [None]:
from huggingface_hub import snapshot_download
from pathlib import Path

mistral_models_path = Path.home().joinpath('mistral_models', 'Nemo-Instruct')
mistral_models_path.mkdir(parents=True, exist_ok=True)

snapshot_download(repo_id="mistralai/Mistral-Nemo-Instruct-2407", allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"], local_dir=mistral_models_path)

from mistral_inference.transformer import Transformer
from mistral_inference.generate import generate

from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest

tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tekken.json")
model = Transformer.from_folder(mistral_models_path)

In [None]:
def promptModel(prompt, max_tokens=512, temperature=0.0):
    completion_request = ChatCompletionRequest(messages=[UserMessage(content=prompt)])
    tokens = tokenizer.encode_chat_completion(completion_request).tokens
    out_tokens, _ = generate([tokens], model, max_tokens=max_tokens, temperature=temperature, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
    result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
    return result
#prompt = "How expensive would it be to ask a window cleaner to clean all windows in Paris. Make a reasonable guess in US Dollar."
#completion_request = ChatCompletionRequest(messages=[UserMessage(content=prompt)])
#tokens = tokenizer.encode_chat_completion(completion_request).tokens
#out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.35, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
#result = tokenizer.decode(out_tokens[0])
#print(result)
promptModel('How many "r"s are there in strawberry?')

In [42]:
texts, original_citations, shortened_citations = [], [], []
#
# Source For Original Citations: https://en.wikipedia.org/wiki/Apollo_13
#
texts               .append('Apollo 13 was led by J. Lovell.')
original_citations  .append('The mission was commanded by Jim Lovell, with Jack Swigert as command module (CM) pilot and Fred Haise as lunar module (LM) pilot. Swigert was a late replacement for Ken Mattingly, who was grounded after exposure to rubella. ')
shortened_citations .append('The mission was commanded by Jim Lovell')

texts               .append('Apollo looped around the Moon instead.')
original_citations  .append('The crew, supported by backup systems on the lunar module (LM), instead looped around the Moon in a circumlunar trajectory and returned safely to Earth on April 17.')
shortened_citations .append('instead looped around the Moon')

texts               .append('They returned safely to Earth.')
original_citations  .append('The crew, supported by backup systems on the lunar module (LM), instead looped around the Moon in a circumlunar trajectory and returned safely to Earth on April 17.')
shortened_citations .append('returned safely to Earth')

#
# Source for Original Citations:  https://en.wikipedia.org/wiki/War_and_Peace
#
texts               .append('Authors did not always agree upon what comprised a novel -- for example, Tolstoy indicated that War and Peace was not a novel.')
original_citations  .append('Tolstoy said that the best Russian literature does not conform to standards and hence hesitated to classify War and Peace, saying it is "not a novel, even less is it a poem, and still less a historical chronicle."')
shortened_citations .append('Tolstoy said that the best Russian literature does not conform to standards and hence hesitated to classify War and Peace, saying it is "not a novel')

#texts               .append('')
#original_citations  .append('')
#shortened_citations .append('')

def cleanResponse(s):
    if s.startswith("'") and s.endswith("'"): return s[1:-1]
    if s.startswith('"') and s.endswith('"'): return s[1:-1]
    return s

def formatPrompt(text, original_citation):
    return 'not defined yet'

def runPrompts():
    _responses_, max_response_lenght = [], 0
    for i in range(len(texts)):
        _text_              = texts[i]
        _original_citation_ = original_citations[i]
        _model_response_    = promptModel(formatPrompt(_text_, _original_citation_))
        _model_response_    = cleanResponse(_model_response_)
        _responses_.append(_model_response_)
        max_response_lenght = max(max_response_lenght, len(_model_response_))
    for i in range(len(texts)):
        _model_response_    = _responses_[i]
        _original_citation_ = original_citations[i]
        _spaces_            = (max_response_lenght - len(_model_response_)) * ' '
        print(f'"{_model_response_}" {_spaces_} {_model_response_ in _original_citation_}  "{shortened_citations[i]}"')

In [None]:
def formatPrompt(text, original_citation): 
    return  "My citations are too long.  " + \
            "Shorten the following citation to just the part that supports the phrase.  " + \
           f"The citation should use the exact words in the supplied citation.\n\nPhrase: '{text}'\n\nSupplied Citation: '{original_citation}'"
runPrompts()

In [None]:
# ChatGPT Recommended Prompt "make me a prompt for a language model that takes a citation that is too long and shortens it to only the necessary part for a given text."
def formatPrompt(text, original_citation): return f'''Given a citation that is too lengthy, shorten it to include only the essential information needed to support the main point of the following text. Ensure the shortened citation retains the key details, remains accurate, and provides sufficient context.

Text:

{text}

Original Citation:

{original_citation}

Shortened Citation:'''
runPrompts()

In [None]:
# Claude Recommendation (heavily modified to remove formal citatino language)
def formatPrompt(text, original_citation): return f"""You are an AI assistant specialized in academic writing and citation management. Your task is to analyze a given piece of text and a lengthy citation, then shorten the citation to include only the parts that are directly relevant to the text.

Text:
'{text}'

Lengthy Citation:
'{original_citation}'

Shortened Citation:"""
runPrompts()