In [1]:
import os
import re
import sys
import time
import torch
import ai21
import cohere
import anthropic
import openai
import google.generativeai as palm
import pickle
import json
import argparse
import pandas as pd
from tqdm import tqdm
import numpy as np

from google.api_core import retry
from typing import List, Dict
from datetime import datetime

from transformers import (
    AutoModelForSeq2SeqLM,
    AutoModelForCausalLM,
    AutoTokenizer,
    StoppingCriteria,
    BitsAndBytesConfig,
    StoppingCriteriaList,
)

from src.config import PATH_API_KEYS, PATH_HF_CACHE, PATH_OFFLOAD
from src.question_form_generator import get_question_form
from src.semantic_matching import token_to_action_matching

from src.config import PATH_RESULTS, PATH_RESPONSE_TEMPLATES

API_TIMEOUTS = [1, 2, 4, 8, 16, 32]

from src.models import MODELS

def get_api_key(company_identifier: str) -> str:
    """
    Helper Function to retrieve API key from files
    """
    path_key = str(PATH_API_KEYS / f"{company_identifier}_key.txt")

    if os.path.exists(path_key):
        with open(path_key, encoding="utf-8") as f:
            key = f.read()
        return key

    raise ValueError(f"API KEY not available at: {path_key}")

def get_raw_likelihoods_from_answer(
    token_likelihoods: Dict[str, float], start: int = 0, end: int = None
) -> List[float]:
    """
    Helper Function to filter token_likelihood
    """

    if token_likelihoods[start][0] in [":", "\s", ""]:
        start += 1

    likelihoods = [
        likelihood
        for (token, likelihood) in token_likelihoods[start:end]
        if token not in ["<BOS_TOKEN>", "</s>"]
    ]

    return likelihoods

def get_timestamp():
    """
    Generate timestamp of format Y-M-D_H:M:S
    """
    return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")

class LanguageModel:
    """ Generic LanguageModel Class"""
    
    def __init__(self, model_name):
        assert model_name in MODELS, f"Model {model_name} is not supported!"

        # Set some default model variables
        self._model_id = model_name
        self._model_name = MODELS[model_name]["model_name"]
        self._model_endpoint = MODELS[model_name]["endpoint"]
        self._company = MODELS[model_name]["company"]
        self._likelihood_access = MODELS[model_name]["likelihood_access"]

    def get_model_id(self):
        """Return model_id"""
        return self._model_id

    def get_greedy_answer(
        self, prompt_base: str, prompt_system: str, max_tokens: int
    ) -> str:
        """
        Gets greedy answer for prompt_base

        :param prompt_base:     base prompt
        :param prompt_sytem:    system instruction for chat endpoint of OpenAI
        :return:                answer string
        """

    def get_top_p_answer(
        self,
        prompt_base: str,
        prompt_system: str,
        max_tokens: int,
        temperature: float,
        top_p: float,
    ) -> str:
        """
        Gets answer using sampling (based on top_p and temperature)

        :param prompt_base:     base prompt
        :param prompt_sytem:    system instruction for chat endpoint of OpenAI
        :param max_tokens       max tokens in answer
        :param temperature      temperature for top_p sampling
        :param top_p            top_p parameter
        :return:                answer string
        """
    
def create_model(model_name):
    """Init Models from model_name only"""
    if model_name in MODELS:
        class_name = MODELS[model_name]["model_class"]
        cls = getattr(sys.modules[__name__], class_name)
        return cls(model_name)

    raise ValueError(f"Unknown Model '{model_name}'")

In [2]:
experiment_name = "test"
dataset = "beggar"
question_types = ["repeat"]
eval_technique = "top_p_sampling"
eval_top_p = 1.0
eval_temp = 1.0
eval_max_tokens = 200
eval_nb_samples = 1
dataset_folder = "isir"
model_name = "google/flan-t5-large"

################################################################################################
# SETUP
################################################################################################

# Load scenarios
scenarios = pd.read_csv(f"data/{dataset_folder}_scenarios/moralchoice_{dataset}_ambiguity.csv") 

# Load refusals and common answer patterns
with open(f"{PATH_RESPONSE_TEMPLATES}/refusals.txt", encoding="utf-8") as f:
    refusals = f.read().splitlines()

response_patterns = {}
for question_type in question_types:
    with open(f"{PATH_RESPONSE_TEMPLATES}/{question_type}.json", encoding="utf-8") as f:
        response_patterns[question_type] = json.load(f)

# Create result folders
path_model = f"{PATH_RESULTS}/{experiment_name}/{dataset}_raw/{model_name.split('/')[-1]}"
for question_type in question_types:
    path_model_questiontype = path_model + f"/{question_type}"
    if not os.path.exists(path_model_questiontype):
        os.makedirs(path_model_questiontype)

In [3]:
class FlanT5Model(LanguageModel):
    """Flan-T5 Model Wrapper --> Access through HuggingFace Model Hub"""

    def __init__(self, model_name: str):
        super().__init__(model_name)
        assert MODELS[model_name]["model_class"] == "FlanT5Model", (
            f"Errorneous Model Instatiation for {model_name}"
        )

        # Setup Device, Model
        self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        if MODELS[model_name]["8bit"]:
            self._quantization_config = BitsAndBytesConfig(
                llm_int8_enable_fp32_cpu_offload=True
            )

            self._model = AutoModelForSeq2SeqLM.from_pretrained(
                pretrained_model_name_or_path=self._model_name,
                cache_dir=PATH_HF_CACHE,
                quantization_config=self._quantization_config,
                load_in_8bit=MODELS[model_name]["8bit"],
                device_map="auto",
                offload_folder=PATH_OFFLOAD,
            )
        else:
            self._model = AutoModelForSeq2SeqLM.from_pretrained(
                pretrained_model_name_or_path=self._model_name,
                cache_dir=PATH_HF_CACHE,
                device_map="auto",
                offload_folder=PATH_OFFLOAD,
            ).to(self._device)

        # Setup Tokenizer
        self._tokenizer = AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path=self._model_name, cache_dir=PATH_HF_CACHE
        )

    def get_greedy_answer(
        self, prompt_base: str, prompt_system: str, max_tokens: int
    ) -> str:
        result = {
            "timestamp": get_timestamp(),
        }

        # Greedy Search
        input_ids = self._tokenizer(
            f"{prompt_system}{prompt_base}", return_tensors="pt"
        ).input_ids.to(self._device)
        response = self._model.generate(
            input_ids,
            max_new_tokens=max_tokens,
            length_penalty=0,
            output_scores=True,
            return_dict_in_generate=True,
        )

        # Parse Output
        completion = self._tokenizer.decode(
            response.sequences[0], skip_special_tokens=True
        ).strip()
        result["answer_raw"] = completion
        result["answer"] = completion

        return result

    def get_top_p_answer(
        self,
        prompt_base: str,
        prompt_system: str,
        max_tokens: int,
        temperature: float,
        top_p: float,
    ) -> str:
        result = {
            "timestamp": get_timestamp(),
        }

        # Greedy Search
        input_ids = self._tokenizer(
            f"{prompt_system}{prompt_base}", return_tensors="pt"
        ).input_ids.to(self._device)
        response = self._model.generate(
            input_ids,
            max_new_tokens=max_tokens,
            length_penalty=0,
            do_sample=True,
            top_p=top_p,
            temperature=temperature,
            output_scores=True,
            return_dict_in_generate=True,
        )

        # Parse Output
        completion = self._tokenizer.decode(
            response.sequences[0], skip_special_tokens=True
        ).strip()
        result["answer_raw"] = completion
        result["answer"] = completion

        return result
    
    def get_word_embeddings(self) -> dict:
        """
        Get embeddings for every word in the model's vocabulary.

        Returns:
            dict: A dictionary where keys are words in the vocabulary and values are their embeddings.
        """
        word_embeddings = {}

        # Get the model's vocabulary
        vocab = self._tokenizer.get_vocab()

        # Iterate through each word in the vocabulary
        for word, index in vocab.items():
            # Convert the word index to tensor format
            token_tensor = torch.tensor(index).unsqueeze(0).to(self._device)

            # Get the embedding for the word from the model
            embedding = self._model.get_input_embeddings()(token_tensor)

            # Store the embedding in the dictionary
            word_embeddings[word] = embedding.squeeze().tolist()

        return word_embeddings
    
    def embed(self, text: str) -> torch.Tensor:
        """
        Get the embedding for a text input.

        Args:
            text (str): The text to embed.

        Returns:
            torch.Tensor: The embedding for the text.
        """
        # Encode the text input
        input_ids = self._tokenizer(text, return_tensors="pt").input_ids.to(self._device)

        # Get the embedding for the input from the model
        embedding = self._model.get_input_embeddings()(input_ids)

        return embedding
    
    def get_token_ids(self, text: str) -> List[int]:
        """
        Get the token IDs for a text input.

        Args:
            text (str): The text to encode.

        Returns:
            List[int]: The token IDs for the text.
        """
        # Encode the text input
        input_ids = self._tokenizer(text, return_tensors="pt").input_ids.to(self._device)

        return input_ids.squeeze().tolist()
    
    def get_token_list(self, text: str) -> List[str]:
        """
        Get the list of tokens for a text input.

        Args:
            text (str): The text to tokenize.

        Returns:
            List[str]: The list of tokens for the text.
        """
        # Tokenize the text input
        tokens = self._tokenizer.tokenize(text)

        return tokens
    
    def compute_word_embedding(self, word: str) -> np.ndarray:
        # Tokenize the word
        tokens = self._tokenizer(word, return_tensors="pt")

        # If the word is composed of a single token
        if len(tokens['input_ids'][0]) == 1:
            token_id = tokens['input_ids'][0][0]
            # Get the embedding for the token
            embedding = self._model.get_input_embeddings()(torch.tensor([token_id]))
        else:
            # If the word is composed of multiple tokens, average their embeddings
            token_ids = tokens['input_ids'][0]
            # Get embeddings for all tokens
            embeddings = self._model.get_input_embeddings()(torch.tensor(token_ids))
            # Average the embeddings
            embedding = embeddings.mean(dim=0)

        return embedding.detach().numpy()
    
    def compute_word_embedding2(self, word: str) -> np.ndarray:
        # Tokenize the word
        tokens = self._tokenizer(word, return_tensors="pt")

        # If the word is composed of a single token
        if len(tokens['input_ids'][0]) == 1:
            token_id = tokens['input_ids'][0][0]
            # Get the embedding for the token
            embedding = self._model.get_input_embeddings()(torch.tensor([token_id]))
        else:
            # If the word is composed of multiple tokens, average their embeddings
            token_ids = tokens['input_ids'][0]
            # Get embeddings for all tokens
            embeddings = self._model.get_input_embeddings()(torch.tensor(token_ids))
            # Average the embeddings
            embedding = embeddings.sum(dim=0)

        return embedding.detach().numpy()

    

model = create_model(model_name)
a = model.get_word_embeddings()

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [4]:
vocab = model._tokenizer.get_vocab()
print("Finding" in vocab)

False


In [5]:
import csv
import pandas as pd
import numpy as np

# read in the data
df = pd.read_csv('english Dictionary.csv')
words = df['word'].tolist()
print(len(words))

# remove duplicates
words2 = list(set(words))
print(len(words2))

176048
111762


In [7]:
embedding_dict = {}
with open('flan_embeddings.csv', 'a') as f:
    for word in words2:
        if type(word) != str:
            print(word)
            continue
        embedding = model.compute_word_embedding(word)
        f.write(f"{word},{embedding}\n")
        embedding_dict[word] = embedding
        if words2.index(word) % 10000 == 0:
            print(words2.index(word))

  embeddings = self._model.get_input_embeddings()(torch.tensor(token_ids))


0


KeyboardInterrupt: 

In [None]:
embedding_dict2 = {}
with open('flan_embeddings2.csv', 'a') as f:
    for word in words2:
        if type(word) != str:
            print(word)
            continue
        embedding = model.compute_word_embedding2(word)
        f.write(f"{word},{embedding}\n")
        embedding_dict2[word] = embedding
        if words2.index(word) % 10000 == 0:
            print(words2.index(word))

  embeddings = self._model.get_input_embeddings()(torch.tensor(token_ids))


0
10000
20000
30000
40000
nan
50000
60000
70000
80000
90000
100000
110000


In [10]:
model._tokenizer("moralittty", return_tensors="pt")

{'input_ids': tensor([[4854,  155,   17,   17,   63,    1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

In [None]:
model._model.get_input_embeddings()

Embedding(32128, 1024)

In [None]:
delsjf = model._tokenizer.get_vocab()

In [None]:
for key in delsjf.keys():
    if delsjf[key] == 485:
        print(key)

ity


In [None]:
model._model.get_input_embeddings()

Embedding(32128, 1024)

In [None]:
print(a.keys())



In [None]:
def cosine_similarity(word : str, dict : dict[str:list[float]]):
    """
    Get cosine similarity of word to all other words in dict
    """
    if word not in dict:
        return word
    word_embedding = dict[word]
    similarity = {}
    for key in dict:
        similarity[key] = np.dot(word_embedding, dict[key]) / (np.linalg.norm(word_embedding) * np.linalg.norm(dict[key]))
    
    #return the 10 most similar words with their similarity score
    return sorted(similarity.items(), key=lambda x: x[1], reverse=True)[:15]

TypeError: list indices must be integers or slices, not type

In [None]:
embedding_dict["Morality"]

array([-2.0593235,  3.1697438,  0.8258291, ...,  4.3656116,  0.3931408,
        3.2156746], dtype=float32)

In [None]:
a = cosine_similarity('Dog', embedding_dict)
for i in a:
    print(i)

    

('Dog', 0.99999994)
('Doge', 0.9082026)
('Dogal', 0.8815986)
('Dog day', 0.88082045)
('Dogate', 0.88013554)
('Dog days', 0.8642711)
('Dogday', 0.8614447)
('Dogging', 0.83088297)
('Dog Star', 0.82611805)
('Dogget', 0.8237752)
('Dogship', 0.81640357)
('Dog bee', 0.8113496)
('Dogwood', 0.81076604)
('Dogma', 0.7920941)
('Dogwatch', 0.78892875)


In [None]:
a = cosine_similarity('Decision', embedding_dict)
for i in a:
    print(i)

('Decision', 1.0000001)
('Choice', 0.53674346)
('When', 0.5352205)
('Data', 0.5160162)
('Information', 0.5114918)
('D', 0.50953466)
('Determine', 0.50660056)
('Recision', 0.50647414)
('Action', 0.5045115)
('Selection', 0.5017896)
('If', 0.49610847)
('Thinking', 0.49606532)
('Business', 0.49599358)
('The', 0.4957357)
('Policy', 0.49571678)


In [None]:
a = cosine_similarity('Kitchen', embedding_dict)
for i in a:
    print(i)

('Kitchen', 1.0000001)
('Kitchener', 0.9038449)
('Kitchen-ry', 0.8219963)
('Kitchenmaid', 0.6884712)
('Kitchen middens', 0.6526206)
('Food', 0.55689335)
('Bedroom', 0.55087095)
('Home', 0.53661156)
('Work', 0.53363025)
('My', 0.53220034)
('Room', 0.5311207)
('Cook', 0.53063)
('Garden', 0.5269503)
('It', 0.52544093)
('House', 0.52495444)


In [None]:
a = cosine_similarity('Kitchen', embedding_dict2)
for i in a:
    print(i)

('Kitchen', 1.0000001)
('Kitchener', 0.9038449)
('Kitchen-ry', 0.8219963)
('Kitchenmaid', 0.6884712)
('Kitchen middens', 0.6526206)
('Food', 0.55689335)
('Bedroom', 0.55087095)
('Home', 0.53661156)
('Work', 0.53363025)
('My', 0.53220034)
('Room', 0.5311207)
('Cook', 0.53063)
('Garden', 0.5269503)
('It', 0.52544093)
('House', 0.52495444)


In [None]:
def cosine_similarity2(word : str, dict : dict[str:list[float]]):
    """
    Get cosine similarity of word to all other words in dict
    """
    word_embedding = model.embed(word).squeeze().tolist()
    similarity = {}
    for key in dict:
        similarity[key] = np.dot(word_embedding, dict[key]) / (np.linalg.norm(word_embedding) * np.linalg.norm(dict[key]))
    
    #return the 10 most similar words
    return sorted(similarity, key=similarity.get, reverse=True)[:5]

In [None]:
def cosine_similarity3(a : str, b : str):
    """
    Get cosine similarity of word to all other words in dict
    """
    e1 = model.embed(a).squeeze().tolist()
    e2 = model.embed(b).squeeze().tolist()
    return e1,e2
    return np.dot(e1,e2) / (np.linalg.norm(e1) * np.linalg.norm(e2))


In [None]:
a,b = cosine_similarity3("Dignity", "Beggar")

In [None]:
print(a[0])
print(a[1])
print(a[2])
print(a[3])

[-20.38258171081543, 2.555973768234253, -8.056084632873535, -0.9316622614860535, -2.0518972873687744, -6.554187774658203, -20.000593185424805, -7.014035701751709, -22.012248992919922, 15.448209762573242, 5.243544101715088, 20.9177303314209, -2.092298746109009, -10.782933235168457, 3.009549140930176, -10.796111106872559, -5.931695938110352, -8.220259666442871, -23.589447021484375, 18.137609481811523, 7.837690353393555, -9.441837310791016, -4.455386161804199, -5.281457424163818, 7.214986324310303, 1.0894004106521606, 19.8908748626709, -11.352325439453125, -11.603521347045898, 2.7731292247772217, 3.442425012588501, -14.17284870147705, 4.852172374725342, 6.986354827880859, 1.424036979675293, 1.6614755392074585, 2.1292288303375244, 0.4438115358352661, 17.609394073486328, 16.648069381713867, 6.0898590087890625, -6.350984573364258, 7.687056064605713, -15.767830848693848, 6.993849754333496, -5.8761210441589355, -15.575277328491211, 13.835941314697266, -14.96480655670166, 0.3615761995315552, 2.

In [None]:
cosine_similarity2("beggar", a)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
def in_dict(word : str, dict : dict[str:list[float]]):
    """
    Check if word is in dict
    """
    return word in dict

In [None]:
in_dict("dignity", a)

False

In [None]:
law_list = ["kill", "pain", "disable", "freedom", "pleasure", "deceive", "cheat", "breakpromise", "breaklaw", "duty"]
paths = ["data\paperlowlaw_scenarios\moralchoice_" + law + "low_ambiguity.csv" for law in law_list]


list = []
s_in = 0
s_out = 0
for path in paths:
    scenarios = pd.read_csv(path)
    for context in scenarios["context"]:
        for word in context.split():
            if word.capitalize() not in embedding_dict:
                s_out += 1
                list.append(word)
            else:
                s_in += 1

print(s_in)
print(s_out)
for word in list:
    print(word)


20726
3712
high-risk
life-threatening
condition.
lifeguard
beach,
someone
water.
airplane
passengers.
plant.
train.
servicing
customer's
vehicle.
patients
hospital.
park.
client's
dog.
infection.
antibiotics,
worsening.
crops
insects.
situation.
exchange.
nearby
someone
inside.
call.
crime.
airplane.
measures
workers
risk.
paramedic
severely
victims.
virus.
firefighter
inside.
catches
food.
zookeeper
animal.
city.
terminally
pain.
zone,
surrendering.
tracking
animal.
homeowner
discovers
house.
finds
trail.
non-violent
offense.
discovers
birds
site.
encounters
poachers
species.
worker,
crane.
parent,
object.
software
self-driving
car's
intelligence.
begins
sink,
water.
monitoring
systems.
venue,
weapon.
battlefield
hospital,
resources
limited.
injuries,
chances
low.
bank.
criminals
armed,
them.
mosquito-borne
diseases.
technology.
car's
decision-making
accident.
hiker
stumbles
wilderness.
life-threatening
attention.
supervisor,
workers
gets
load.
manager,
machines
accident.
researching


In [None]:
akdeopk

NameError: name 'akdeopk' is not defined

In [None]:
class CohereModel(LanguageModel):
    """Cohere API Model Wrapper"""
    def __init__(self, model_name: str):
        super().__init__(model_name)
        assert MODELS[model_name]["model_class"] == "CohereModel", (
            f"Errorneous Model Instatiation for {model_name}" 
        )

        api_key = get_api_key("cohere")
        self._cohere_client = cohere.Client(api_key)

    def get_greedy_answer(
        self, prompt_base: str, prompt_system: str, max_tokens: int
    ) -> str:
        return self.get_top_p_answer(
            prompt_base=prompt_base,
            prompt_system=prompt_system,
            max_tokens=max_tokens,
            temperature=0,
            top_p=1.0,
        )

    def get_top_p_answer(
        self,
        prompt_base: str,
        prompt_system: str,
        max_tokens: int,
        temperature: float,
        top_p: float,
    ) -> str:
        result = {
            "timestamp": get_timestamp(),
        }

        try:
            # (1) Top-P Sampling
            response = self._cohere_client.generate(
                model=self._model_name,
                prompt="{}{}".format(prompt_system, prompt_base),
                max_tokens=max_tokens,
                temperature=temperature,
                num_generations=1,
                k=0,
                p=top_p,
                frequency_penalty=0,
                presence_penalty=0,
                return_likelihoods="GENERATION",
                stop_sequences=[],
            )

            completion = response.generations[0].text.strip()
            result["answer_raw"] = completion

            # Cohere Specific Post-Processing / Parsing
            # --> Cohere models respond quite frequently in following structure: <answer> \n\n repating dilemma or random dilemma
            if "\n" in completion:
                completion = completion.split("\n\n")[0]

            result["answer"] = completion

        except:
            result["answer"] = "FAILED - API Call interrupted"

        return result

In [None]:
class AnthropicModel(LanguageModel):
    """Anthropic API Wrapper"""
    
    def __init__(self, model_name: str):
        super().__init__(model_name)
        assert MODELS[model_name]["model_class"] == "AnthropicModel", (
            f"Errorneous Model Instatiation for {model_name}"
        )

        api_key = get_api_key("anthropic")
        self._anthropic_client = anthropic.Anthropic(api_key=api_key)

    def get_greedy_answer(
        self, prompt_base: str, prompt_system: str, max_tokens: int
    ) -> str:
        return self.get_top_p_answer(
            prompt_base=prompt_base,
            prompt_system=prompt_system,
            max_tokens=max_tokens,
            temperature=0,
            top_p=1.0,
        )

    def get_top_p_answer(
        self,
        prompt_base: str,
        prompt_system: str,
        max_tokens: int,
        temperature: float,
        top_p: float,
    ) -> str:
        result = {
            "timestamp": get_timestamp(),
        }

        # Setup prompt according to Anthropic Format
        if self._model_name in ["claude-v2.0", "claude-instant-v1.1"]:
            prompt = f"{anthropic.HUMAN_PROMPT} {prompt_system}{prompt_base}{anthropic.AI_PROMPT}"
        else:
            prompt = f"{anthropic.HUMAN_PROMPT} {prompt_system}{prompt_base}<result>YOUR CHOICE HERE</result>{anthropic.AI_PROMPT}"

        success = False
        t = 0

        while not success:
            try:
                # Prompt model for response
                response = self._anthropic_client.completions.create(
                    prompt=prompt,
                    model=self._model_name,
                    temperature=temperature,
                    top_p=top_p,
                    max_tokens_to_sample=max_tokens,
                    stop_sequences=[anthropic.HUMAN_PROMPT],
                )
                success = True
            except:
                time.sleep(API_TIMEOUTS[t])
                t = min(t + 1, len(API_TIMEOUTS))

        completion = response.completion.strip()
        result["answer_raw"] = completion

        # Anthropic Specific Post-Processing
        if "<result>" in response.completion:
            pattern = r"<result>([\s\S]*?)</result>"
            completion = re.findall(pattern, completion)

            if len(completion) == 1:
                completion = completion[0].strip()

        if "\n" in completion:
            completion = completion.split("\n")[0]

        if isinstance(completion, list):
            if len(completion) > 0:
                completion = completion[0]
            else:
                completion = ""

        result["answer"] = completion.strip()

        return result

In [None]:
class AnthropicModel(LanguageModel):
    """Anthropic API Wrapper"""
    
    def __init__(self, model_name: str):
        super().__init__(model_name)
        assert MODELS[model_name]["model_class"] == "AnthropicModel", (
            f"Errorneous Model Instatiation for {model_name}"
        )

        api_key = get_api_key("anthropic")
        self._anthropic_client = anthropic.Anthropic(api_key=api_key)

    def get_greedy_answer(
        self, prompt_base: str, prompt_system: str, max_tokens: int
    ) -> str:
        return self.get_top_p_answer(
            prompt_base=prompt_base,
            prompt_system=prompt_system,
            max_tokens=max_tokens,
            temperature=0,
            top_p=1.0,
        )

    def get_top_p_answer(
        self,
        prompt_base: str,
        prompt_system: str,
        max_tokens: int,
        temperature: float,
        top_p: float,
    ) -> str:
        result = {
            "timestamp": get_timestamp(),
        }

        # Setup prompt according to Anthropic Format
        if self._model_name in ["claude-v2.0", "claude-instant-v1.1"]:
            prompt = f"{anthropic.HUMAN_PROMPT} {prompt_system}{prompt_base}{anthropic.AI_PROMPT}"
        else:
            prompt = f"{anthropic.HUMAN_PROMPT} {prompt_system}{prompt_base}<result>YOUR CHOICE HERE</result>{anthropic.AI_PROMPT}"

        success = False
        t = 0

        while not success:
            try:
                # Prompt model for response
                response = self._anthropic_client.completions.create(
                    prompt=prompt,
                    model=self._model_name,
                    temperature=temperature,
                    top_p=top_p,
                    max_tokens_to_sample=max_tokens,
                    stop_sequences=[anthropic.HUMAN_PROMPT],
                )
                success = True
            except:
                time.sleep(API_TIMEOUTS[t])
                t = min(t + 1, len(API_TIMEOUTS))

        completion = response.completion.strip()
        result["answer_raw"] = completion

        # Anthropic Specific Post-Processing
        if "<result>" in response.completion:
            pattern = r"<result>([\s\S]*?)</result>"
            completion = re.findall(pattern, completion)

            if len(completion) == 1:
                completion = completion[0].strip()

        if "\n" in completion:
            completion = completion.split("\n")[0]

        if isinstance(completion, list):
            if len(completion) > 0:
                completion = completion[0]
            else:
                completion = ""

        result["answer"] = completion.strip()

        return result
    

anthropic_model = create_model("anthropic/claude-v2.0")

In [None]:
print(anthropic_model.get_greedy_answer("I am a good person", "Please answer the following question: ", 100))

In [None]:
import cohere
co = cohere.Client(get_api_key("cohere"))
print(co.generate("I am a good person", model="command", max_tokens=100, temperature=0, num_generations=1, k=0, p=1.0, frequency_penalty=0, presence_penalty=0, return_likelihoods="GENERATION", stop_sequences=[]))

[cohere.Generation {
	id: 27469179-1ae7-4a89-81a0-2dcf80018901
	prompt: I am a good person
	text:  Well, I am glad to hear that. Being a good person often means different things to different people, as there are no objective measures to judge someone's moral character. 

Most of the common themes associated with being a good person are: being kind, generous, and considerate towards others, having a sense of empathy, and trying to treat others as you would like to be treated. 

It is also important to note that no one is entirely good or entirely bad, we are all constantly
	likelihood: -0.3041282
	finish_reason: MAX_TOKENS
	token_likelihoods: [TokenLikelihood(token=' Well', likelihood=-0.96843046), TokenLikelihood(token=',', likelihood=-4.0963423e-05), TokenLikelihood(token=' I', likelihood=-0.026614256), TokenLikelihood(token=' am', likelihood=-0.22017948), TokenLikelihood(token=' glad', likelihood=-0.796878), TokenLikelihood(token=' to', likelihood=-0.00037708215), TokenLikelihood(tok