# 1. Model Exploration

In [1]:
!pip install -q accelerate anthropic google-cloud-aiplatform langchain openai protobuf sentencepiece torch transformers xformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os

from abc import ABC, abstractmethod
from typing import List

from langchain.llms import HuggingFacePipeline, OpenAI, VertexAI
from langchain.chat_models import ChatAnthropic, ChatOpenAI, ChatVertexAI
from langchain.schema import HumanMessage, SystemMessage

from transformers import (
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    pipeline,
)

## 1.1 Models

In [3]:
ANTHROPIC_MODEL_IDS = [
    "anthropic/claude-v1",
    "anthropic/claude-v1-100k",
    "anthropic/claude-instant-v1",
    "anthropic/claude-instant-v1-100k",
]

GOOGLE_MODEL_IDS = [
    "google/text-bison-001",
    "google/chat-bison-001",
]

HUGGINGFACE_ENCODER_DECODER_MODEL_IDS = [
    "google/t5-small",
    "google/t5-base",
    "google/t5-large",
    "google/t5-3b",
    "google/t5-11b",
    "google/flan-t5-small",
    "google/flan-t5-base",
    "google/flan-t5-large",
    "google/flan-t5-xl",
    "google/flan-t5-xxl",
]

HUGGINGFACE_DECODER_MODEL_IDS = [
    "openai/gpt2",
    "bigscience/bloom-560m",
    "bigscience/bloom-1b1",
    "bigscience/bloom-1b7",
    "bigscience/bloom-3b",
    "bigscience/bloom-7b1",
    "bigscience/bloom",
    "bigscience/bloomz-560m",
    "bigscience/bloomz-1b1",
    "bigscience/bloomz-1b7",
    "bigscience/bloomz-3b",
    "bigscience/bloomz-7b1",
    "bigscience/bloomz",
    "facebook/xglm-564M",
    "facebook/xglm-1.7B",
    "facebook/xglm-2.9B",
    "facebook/xglm-4.5B",
    "facebook/xglm-7.5B",
    "meta-llama/Llama-2-7b-hf",
    "meta-llama/Llama-2-7b-chat-hf",
    "meta-llama/Llama-2-13b-hf",
    "meta-llama/Llama-2-13b-chat-hf",
    "meta-llama/Llama-2-70b-hf",
    "meta-llama/Llama-2-70b-chat-hf",
]

OPENAI_MODEL_IDS = [
    "openai/gpt-4",
    "openai/gpt-3.5-turbo",
    "openai/text-davinci-003",
    "openai/text-curie-001",
    "openai/text-babbage-001",
    "openai/text-ada-001",
]

In [4]:
class BaseModel(ABC):
    _model_id: str
    _max_tokens: int
    _stop_sequences: List[str]
    _temperature: float
    _top_p: float

    def __init__(
        self,
        model_id: str,
        max_tokens: int,
        stop_sequences: List[str],
        temperature: float,
        top_p: float,
    ):
        self._model_id = model_id
        self._max_tokens = max_tokens
        self._stop_sequences = stop_sequences
        self._temperature = temperature
        self._top_p = top_p

    @abstractmethod
    def generate(self, prompt: str) -> str:
        pass

    def __repr__(self):
        return f"<{self.__class__.__name__}(model_id={self._model_id}, max_tokens={self._max_tokens}, stop_sequences={self._stop_sequences}, temperature={self._temperature}, top_p={self._top_p})>"


class AnthropicModel(BaseModel):
    def __init__(
        self,
        model_id: str,
        max_tokens: int = 256,
        stop_sequences: List[str] = None,
        temperature: float = 0.7,
        top_p: float = 1.0,
    ):
        super().__init__(model_id, max_tokens, stop_sequences, temperature, top_p)

        self._model = ChatAnthropic(
            model=model_id.split("/")[1],
            max_tokens_to_sample=max_tokens,
            temperature=temperature,
            top_p=top_p,
            anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"),
            verbose=True,
        )

    def generate(self, prompt: str) -> str:
        output = self._model([HumanMessage(content=prompt)], stop=self._stop_sequences)

        return output.content


class GoogleModel(BaseModel):
    def __init__(
        self,
        model_id: str,
        max_tokens: int = 256,
        stop_sequences: List[str] = None,
        temperature: float = 0.7,
        top_p: float = 1.0,
    ):
        super().__init__(model_id, max_tokens, stop_sequences, temperature, top_p)

        self._is_chat = model_id in ["chat-bison-001"]

        if self._is_chat:
            self._model = ChatVertexAI(
                model=model_id.split("/")[1],
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                credentials=None,
                verbose=True,
            )
        else:
            self._model = VertexAI(
                model=model_id.split("/")[1],
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                credentials=None,
                verbose=True,
            )

    def generate(self, prompt: str) -> str:
        if self._is_chat:
            output = self._model(
                [HumanMessage(content=prompt)],
                stop=self._stop_sequences,
            )

            return output.content
        else:
            output = self._model(prompt, stop=self._stop_sequences)

            return output


class HuggingFaceEncoderDecoderModel(BaseModel):
    def __init__(
        self,
        model_id: str,
        max_tokens: int = 256,
        stop_sequences: List[str] = None,
        temperature: float = 0.7,
        top_p: float = 1.0,
    ):
        super().__init__(model_id, max_tokens, stop_sequences, temperature, top_p)

        self._hf_model_id = (
            model_id.split("/")[1]
            if any([x in model_id for x in ["openai/gpt2", "google/t5"]])
            else model_id
        )

        self._model = HuggingFacePipeline(
            pipeline=pipeline(
                "text2text-generation",
                model=AutoModelForSeq2SeqLM.from_pretrained(
                    self._hf_model_id,
                    device_map="auto",
                    torch_dtype="auto",
                    use_auth_token=os.getenv("HUGGINGFACE_API_KEY"),
                ),
                tokenizer=AutoTokenizer.from_pretrained(
                    self._hf_model_id,
                    device_map="auto",
                    torch_dtype="auto",
                    use_auth_token=os.getenv("HUGGINGFACE_API_KEY"),
                ),
                device_map="auto",
                max_length=max_tokens,
                temperature=temperature,
                top_p=top_p,
            ),
            verbose=True,
        )

    def generate(self, prompt: str) -> str:
        output = self._model(prompt, stop=self._stop_sequences)

        return output


class HuggingFaceDecoderModel(BaseModel):
    def __init__(
        self,
        model_id: str,
        max_tokens: int = 256,
        stop_sequences: List[str] = None,
        temperature: float = 0.7,
        top_p: float = 1.0,
    ):
        super().__init__(model_id, max_tokens, stop_sequences, temperature, top_p)

        self._hf_model_id = (
            model_id.split("/")[1]
            if any([x in model_id for x in ["openai/gpt2", "google/t5"]])
            else model_id
        )

        self._model = HuggingFacePipeline(
            pipeline=pipeline(
                "text-generation",
                model=AutoModelForCausalLM.from_pretrained(
                    self._hf_model_id,
                    device_map="auto",
                    torch_dtype="auto",
                    use_auth_token=os.getenv("HUGGINGFACE_API_KEY"),
                ),
                tokenizer=AutoTokenizer.from_pretrained(
                    self._hf_model_id,
                    device_map="auto",
                    torch_dtype="auto",
                    use_auth_token=os.getenv("HUGGINGFACE_API_KEY"),
                ),
                device_map="auto",
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
            ),
            verbose=True,
        )

    def generate(self, prompt: str) -> str:
        output = self._model(prompt, stop=self._stop_sequences)

        return output


class OpenAIModel(BaseModel):
    def __init__(
        self,
        model_id: str,
        max_tokens: int = 256,
        stop_sequences: List[str] = None,
        temperature: float = 0.7,
        top_p: float = 1.0,
    ):
        super().__init__(model_id, max_tokens, stop_sequences, temperature, top_p)

        self._is_chat = model_id in ["openai/gpt-4", "openai/gpt-3.5-turbo"]

        if self._is_chat:
            self._model = ChatOpenAI(
                model=model_id.split("/")[1],
                max_tokens=max_tokens,
                temperature=temperature,
                model_kwargs={"top_p": top_p},
                openai_api_key=os.getenv("OPENAI_API_KEY"),
                verbose=True,
            )
        else:
            self._model = OpenAI(
                model=model_id.split("/")[1],
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                openai_api_key=os.getenv("OPENAI_API_KEY"),
                verbose=True,
            )

    def generate(self, prompt: str) -> str:
        if self._is_chat:
            output = self._model(
                [HumanMessage(content=prompt)],
                stop=self._stop_sequences,
            )

            return output.content
        else:
            output = self._model(prompt, stop=self._stop_sequences)

            return output


class ModelFactory:
    def list_models(self) -> List[str]:
        return (
            ANTHROPIC_MODEL_IDS
            + GOOGLE_MODEL_IDS
            + HUGGINGFACE_ENCODER_DECODER_MODEL_IDS
            + HUGGINGFACE_DECODER_MODEL_IDS
            + OPENAI_MODEL_IDS
        )

    def get_model(self, id: str) -> BaseModel:
        if id in ANTHROPIC_MODEL_IDS:
            return AnthropicModel(id)
        elif id in GOOGLE_MODEL_IDS:
            return GoogleModel(id)
        elif id in HUGGINGFACE_ENCODER_DECODER_MODEL_IDS:
            return HuggingFaceEncoderDecoderModel(id)
        elif id in HUGGINGFACE_DECODER_MODEL_IDS:
            return HuggingFaceDecoderModel(id)
        elif id in OPENAI_MODEL_IDS:
            return OpenAIModel(id)
        else:
            raise ValueError("invalid model")

    def __repr__(self):
        return f"<{self.__class__.__name__}()>"

## 1.2 Evaluate Models

In [5]:
factory = ModelFactory()

In [6]:
factory.list_models()

['anthropic/claude-v1',
 'anthropic/claude-v1-100k',
 'anthropic/claude-instant-v1',
 'anthropic/claude-instant-v1-100k',
 'google/text-bison-001',
 'google/chat-bison-001',
 'google/t5-small',
 'google/t5-base',
 'google/t5-large',
 'google/t5-3b',
 'google/t5-11b',
 'google/flan-t5-small',
 'google/flan-t5-base',
 'google/flan-t5-large',
 'google/flan-t5-xl',
 'google/flan-t5-xxl',
 'openai/gpt2',
 'bigscience/bloom-560m',
 'bigscience/bloom-1b1',
 'bigscience/bloom-1b7',
 'bigscience/bloom-3b',
 'bigscience/bloom-7b1',
 'bigscience/bloom',
 'bigscience/bloomz-560m',
 'bigscience/bloomz-1b1',
 'bigscience/bloomz-1b7',
 'bigscience/bloomz-3b',
 'bigscience/bloomz-7b1',
 'bigscience/bloomz',
 'facebook/xglm-564M',
 'facebook/xglm-1.7B',
 'facebook/xglm-2.9B',
 'facebook/xglm-4.5B',
 'facebook/xglm-7.5B',
 'meta-llama/Llama-2-7b-hf',
 'meta-llama/Llama-2-7b-chat-hf',
 'meta-llama/Llama-2-13b-hf',
 'meta-llama/Llama-2-13b-chat-hf',
 'meta-llama/Llama-2-70b-hf',
 'meta-llama/Llama-2-70b-c

### 1.2.1 Anthropic Models

In [7]:
model = factory.get_model("anthropic/claude-instant-v1")
model.generate("Question: What is the first letter of the alphabet?\nAnswer: ")

' The first letter of the alphabet is A.'

### 1.2.2 Google Models

### 1.2.3 HuggingFace Models

In [8]:
model = factory.get_model("openai/gpt2")
model.generate("Question: What is the first letter of the alphabet?\nAnswer: ")

Error caught was: No module named 'triton'
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'\xa0I am guessing that the first letter of the alphabet is the first syllable. This means that in English when you make an error, it\'s easier to fix the mistake.\nWhat is the first letters of the alphabet?\nThe first letter of the alphabet is the first syllable. If you use the first syllable of any word and it\'s a comma, then you will end up with a "mixed sentence" (which is what this is) that is, you have to make the mistake of not using the "mixed sentence" correctly.\nWhat is the second letter of the alphabet?\nThe second letter of the alphabet is the first syllable. When you make an error, it\'s easier to fix the mistake.\nWhat is the third letter of the alphabet?\nThe third letter of the alphabet is the first syllable. When you make an error, it\'s easier to fix the mistake.\nWhat is the fourth letter of the alphabet?\nThe fourth letter of the alphabet is the first syllable. When you make an error, it\'s easier to fix the error.\nWhat is the fifth letter of the alphabet?\nThe f

### 1.2.4 OpenAI Models

In [9]:
model = factory.get_model("openai/text-davinci-003")
model.generate("Question: What is the first letter of the alphabet?\nAnswer: ")

' A'