# The Training Pipeline for Alpaca 7B

![Alt text](vis/image.png)

# The Data Distribution

![Alt text](vis/image-1.png)

#### we use together ai to load models for convienience

In [12]:
import os, yaml, together, logging
from langchain.llms.base import LLM
from pydantic import Extra, Field, root_validator
from typing import Any, Dict, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.utils import enforce_stop_tokens
from langchain.utils import get_from_dict_or_env

In [8]:
with open('cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

os.environ['TOGETHER_AI_API'] = credentials['TOGETHER_AI_API']
together.api_key = os.environ['TOGETHER_AI_API']

In [10]:
models = together.Models.list()
print("######## Below are the models available ########")
for model in models:
    print(model['name'])

######## Below are the models available ########
Austism/chronos-hermes-13b
EleutherAI/pythia-12b-v0
EleutherAI/pythia-1b-v0
EleutherAI/pythia-2.8b-v0
EleutherAI/pythia-6.9b
Gryphe/MythoMax-L2-13b
HuggingFaceH4/starchat-alpha
NousResearch/Nous-Hermes-13b
NousResearch/Nous-Hermes-Llama2-13b
NumbersStation/nsql-llama-2-7B
OpenAssistant/llama2-70b-oasst-sft-v10
OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5
OpenAssistant/stablelm-7b-sft-v7-epoch-3
Phind/Phind-CodeLlama-34B-Python-v1
Phind/Phind-CodeLlama-34B-v2
SG161222/Realistic_Vision_V3.0_VAE
WizardLM/WizardCoder-15B-V1.0
WizardLM/WizardCoder-Python-34B-V1.0
WizardLM/WizardLM-70B-V1.0
bigcode/starcoder
databricks/dolly-v2-12b
databricks/dolly-v2-3b
databricks/dolly-v2-7b
defog/sqlcoder
garage-bAInd/Platypus2-70B-instruct
huggyllama/llama-13b
huggyllama/llama-30b
huggyllama/llama-65b
huggyllama/llama-7b
lmsys/fastchat-t5-3b-v1.0
lmsys/vicuna-13b-v1.3
lmsys/vicuna-13b-v1.5-16k
lmsys/vicuna-13b-v1.5
lmsys/vicuna-7b-v1.3
mistralai/Mistral-

In [11]:
model_card = 'togethercomputer/alpaca-7b'
together.Models.start(model_card)

{'success': True,
 'value': 'e196cc334bf5e3a6013fc38a42f7c1764a8fd41ace83bf0a42f407b49090d616'}

In [13]:
class TogetherLLM(LLM):
    """Together large language models."""

    model: str = model_card
    """model endpoint to use"""

    together_api_key: str = os.environ["TOGETHER_AI_API"]
    """Together API key"""

    temperature: float = 0.7
    """What sampling temperature to use."""

    max_tokens: int = 512
    """The maximum number of tokens to generate in the completion."""

    class Config:
        extra = Extra.forbid

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that the API key is set."""
        api_key = get_from_dict_or_env(
                                        values, 
                                        "together_ai_api", 
                                        "TOGETHER_AI_API"
                                        )
        values["together_ai_api"] = api_key
        return values

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "together"

    def _call(
        self,
        prompt: str,
        **kwargs: Any,
    ) -> str:
        """Call to Together endpoint."""
        together.api_key = self.together_api_key
        output = together.Complete.create(prompt,
                                          model=self.model,
                                          max_tokens=self.max_tokens,
                                          temperature=self.temperature,
                                          )
        text = output['output']['choices'][0]['text']
        return text


In [14]:
test_llm = TogetherLLM(
                    model=model_card,
                    temperature=0.1,
                    max_tokens=512
                    )

In [15]:
type(test_llm), test_llm.model, test_llm.temperature

(__main__.TogetherLLM, 'togethercomputer/alpaca-7b', 0.1)

In [19]:
res = test_llm("What are Alpacas and how are they different to Lamas?")
print(res)


Alpacas are a species of South American camelid, related to llamas and vicuñas. They are smaller than llamas, and have a finer fleece. Alpacas are shorn once a year, and the fleece is usually white, although other colours are available. Alpacas are also more docile than llamas, and make better pets.
Lamas are a species of South American camelid, related to alpacas and vicuñas. They are larger than alpacas, and have a coarser fleece. Lamas are shorn twice a year, and the fleece is usually brown or grey. Lamas are more hardy than alpacas, and are often used as pack animals.</s>
