In [None]:
class LLM(object):
    def promptModel(self, prompt, sytem_prompt="You are a helpful AI assistant.", max_tokens=512, temperature=0.0):
        raise Exception('LLM Object Needs Subclassed')

#
# MISTRAL NEMO Instruct
#
class LLMNemoInstruct(LLM):
    def __init__(self):
        from huggingface_hub import snapshot_download
        from pathlib import Path
        mistral_models_path = Path.home().joinpath('mistral_models', 'Nemo-Instruct')
        mistral_models_path.mkdir(parents=True, exist_ok=True)
        snapshot_download(repo_id="mistralai/Mistral-Nemo-Instruct-2407", allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"], local_dir=mistral_models_path)
        from mistral_inference.transformer import Transformer
        from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
        self.tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tekken.json")
        self.model     = Transformer.from_folder(mistral_models_path)
    def promptModel(self, prompt, system_prompt="You are a helpful AI assistant.", max_tokens=512, temperature=0.0):
        from mistral_common.protocol.instruct.request import ChatCompletionRequest
        from mistral_common.protocol.instruct.messages import UserMessage
        from mistral_inference.generate import generate
        completion_request = ChatCompletionRequest(messages=[UserMessage(content=prompt)])
        tokens = self.tokenizer.encode_chat_completion(completion_request).tokens
        out_tokens, _ = generate([tokens], self.model, max_tokens=max_tokens, temperature=temperature, eos_id=self.tokenizer.instruct_tokenizer.tokenizer.eos_id)
        result = self.tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
        return result

#
# LLAMA 3.1 8B Instruct
#
class LLMLlama31_8b(LLM):
    def __init__(self):
        import transformers
        import torch
        model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
        self.pipeline = transformers.pipeline(
            "text-generation",
            model=model_id,
            model_kwargs={"torch_dtype": torch.bfloat16},
            device_map="auto",
        )
    def promptModel(self, prompt, system_prompt="You are a helpful AI assistant.", max_tokens=512, temperature=0.0):
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user",   "content": prompt},
        ]
        outputs = self.pipeline(
            messages,
            max_new_tokens=max_tokens,
            do_sample=False,
            temperature=temperature
        )
        return outputs[0]["generated_text"][-1]['content']

#
# Microsoft's Phi 3 Small 8K Instruct
# - fails due to a cudnn library issue
#
class LLMPhi3Small8k(LLM):
    def __init__(self):
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
        torch.random.manual_seed(0)
        model_id = "microsoft/Phi-3-small-8k-instruct"
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id, 
            torch_dtype="auto", 
            trust_remote_code=True, 
        )
        assert torch.cuda.is_available(), "This model needs a GPU to run ..."
        self.device    = torch.cuda.current_device()
        self.model     = self.model.to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
    def promptModel(self, prompt, system_prompt="You are a helpful AI assistant.", max_tokens=512, temperature=0.0):
        from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
        messages = [
            {"role": "user", "content": prompt}
        ]
        pipe = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            device=self.device
        )
        generation_args = {
            "max_new_tokens": max_tokens,
            "return_full_text": False,
            "temperature": temperature,
            "do_sample": False,
        }

        output = pipe(messages, **generation_args)
        return output[0]['generated_text']

#llm = LLMNemoInstruct()
llm = LLMLlama31_8b()
#llm = LLMPhi3Small8k() # fails due to cudnn library issue

In [None]:
#prompt = "How expensive would it be to ask a window cleaner to clean all windows in Paris. Make a reasonable guess in US Dollar."
#completion_request = ChatCompletionRequest(messages=[UserMessage(content=prompt)])
#tokens = tokenizer.encode_chat_completion(completion_request).tokens
#out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.35, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
#result = tokenizer.decode(out_tokens[0])
#print(result)
llm.promptModel('How many "r"s are there in strawberry?')

In [10]:
texts, original_citations, shortened_citations = [], [], []
#
# Source For Original Citations: https://en.wikipedia.org/wiki/Apollo_13
#
texts               .append('Apollo 13 was led by J. Lovell.')
original_citations  .append('The mission was commanded by Jim Lovell, with Jack Swigert as command module (CM) pilot and Fred Haise as lunar module (LM) pilot. Swigert was a late replacement for Ken Mattingly, who was grounded after exposure to rubella. ')
shortened_citations .append('The mission was commanded by Jim Lovell') # ideal answer

texts               .append('Apollo looped around the Moon instead.')
original_citations  .append('The crew, supported by backup systems on the lunar module (LM), instead looped around the Moon in a circumlunar trajectory and returned safely to Earth on April 17.')
shortened_citations .append('instead looped around the Moon') # ideal answer

texts               .append('They returned safely to Earth.')
original_citations  .append('The crew, supported by backup systems on the lunar module (LM), instead looped around the Moon in a circumlunar trajectory and returned safely to Earth on April 17.')
shortened_citations .append('returned safely to Earth') # ideal answer

#
# Source for Original Citations:  https://en.wikipedia.org/wiki/War_and_Peace
#
texts               .append('Authors did not always agree upon what comprised a novel -- for example, Tolstoy indicated that War and Peace was not a novel.')
original_citations  .append('Tolstoy said that the best Russian literature does not conform to standards and hence hesitated to classify War and Peace, saying it is "not a novel, even less is it a poem, and still less a historical chronicle."')
shortened_citations .append('Tolstoy said that the best Russian literature does not conform to standards and hence hesitated to classify War and Peace, saying it is "not a novel') # ideal answer

#texts               .append('')
#original_citations  .append('')
#shortened_citations .append('')

def cleanResponse(s):
    _prefixes_ = ['The shortened citation would be:',
                  'Shortened Citation:',]
    for _prefix_ in _prefixes_:
        if s.startswith(_prefix_):
            s = s[len(_prefix_):].strip()
    if s.startswith("'") and s.endswith("'"): return s[1:-1]
    if s.startswith('"') and s.endswith('"'): return s[1:-1]
    return s

def formatPrompt(text, original_citation):
    return 'not defined yet'

def runPrompts():
    _responses_, max_response_lenght = [], 0
    for i in range(len(texts)):
        _text_              = texts[i]
        _original_citation_ = original_citations[i]
        _model_response_    = llm.promptModel(formatPrompt(_text_, _original_citation_))
        _model_response_    = cleanResponse(_model_response_)
        _responses_.append(_model_response_)
        max_response_lenght = max(max_response_lenght, len(_model_response_))
    for i in range(len(texts)):
        _model_response_    = _responses_[i]
        _original_citation_ = original_citations[i]
        _spaces_            = (max_response_lenght - len(_model_response_)) * ' '
        print(f'"{_model_response_}" {_spaces_} {_model_response_ in _original_citation_}  "{shortened_citations[i]}"')

In [None]:
def formatPrompt(text, original_citation): 
    return  "My citations are too long.  " + \
            "Shorten the following citation to just the part that supports the phrase.  " + \
           f"The citation should use the exact words in the supplied citation.\n\nPhrase: '{text}'\n\nSupplied Citation: '{original_citation}'"
runPrompts()

In [None]:
# ChatGPT Recommended Prompt "make me a prompt for a language model that takes a citation that is too long and shortens it to only the necessary part for a given text."
def formatPrompt(text, original_citation): return f'''Given a citation that is too lengthy, shorten it to include only the essential information needed to support the main point of the following text. Ensure the shortened citation retains the key details, remains accurate, and provides sufficient context.

Text:

{text}

Original Citation:

{original_citation}

Shortened Citation:'''
runPrompts()

In [None]:
# Claude Recommendation (heavily modified to remove formal citatino language)
def formatPrompt(text, original_citation): return f"""You are an AI assistant specialized in academic writing and citation management. Your task is to analyze a given piece of text and a lengthy citation, then shorten the citation to include only the parts that are directly relevant to the text.

Text:
'{text}'

Lengthy Citation:
'{original_citation}'

Shortened Citation:"""
runPrompts()

In [None]:
def formatPrompt(text, original_citation):
    return f'Return only the substring from "Lengthy Text" that supports the following statement: "{text}"\n\nLengthy text: "{original_citation}"'
runPrompts()

In [None]:
def formatPrompt(text, original_citation):
    return f'For the following statement:\n\n"{text}"\n\nWhat is the shortest substring from the following that supports that statement:\n\n"{original_citation}"\n\nOnly provide the substring, no reasoning or caveats.'
runPrompts()