In [1]:
# imports
import os

from openai import OpenAI

from dotenv import load_dotenv

In [4]:
# load env
load_dotenv()

True

In [6]:
TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")

In [7]:
# use OpenAI API directly
client = OpenAI(api_key=TOGETHER_API_KEY,
  base_url='https://api.together.xyz',
)

chat_completion = client.chat.completions.create(
  messages=[
    {
      "role": "system",
      "content": "You are an AI assistant",
    },
    {
      "role": "user",
      "content": "Tell me about the Transformers architecture and the relationship with LLM",
    }
  ],
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
  max_tokens=1024
)

print(chat_completion.choices[0].message.content)

 Sure, I'd be happy to explain!

Transformers are a type of model architecture used in machine learning, particularly in the field of natural language processing (NLP). They were introduced in a paper by Vaswani et al. in 2017, and have since become widely used due to their effectiveness in tasks such as machine translation, text summarization, and sentiment analysis, among others.

The Transformers architecture is based on the self-attention mechanism, which allows the model to consider the entire context of a sentence or document when making predictions. This is in contrast to traditional recurrent neural network (RNN) models, which process sequences one element at a time and can have difficulty capturing long-range dependencies.

The self-attention mechanism in Transformers works by assigning attention weights to each word in the input sequence, based on its relevance to the current word being processed. These attention weights are calculated using a dot product of the input word em

In [10]:
# integrate with LangChain
from langchain.llms.base import LLM
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

from langchain_together import Together

import together

In [6]:
# define a custom LLM
class Together(LLM):
    """
    A custom LLM class that integrates together models.
    
    Arguments:

    model_folder_path: (str) Folder path where the model lies
    model_name: (str) The name of the model to use (<model name>.bin)
    allow_download: (bool) whether to download the model or not    
    """

    model: str = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
    together_api_key: str = os.environ.get("TOGETHER_API_KEY")
    temperature: float = 0.7
    max_tokens: int = 1024

    @property
    def _llm_type(self) -> str:
        return "Together"
    
    def _call(self, prompt: str) -> str:
        """
        This is the main method which will be called when 
        we use the LLM to run with our prompt. Example:
        
        response = llm(prompt)
        """
        together.api_key = self.together_api_key
        output = together.Complete.create(
          prompt, 
          model = self.model, 
          max_tokens = self.max_tokens,
          temperature = self.temperature,
        )
        return output['output']['choices'][0]['text']

In [7]:
# init Together LLM
model="mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = Together(model=model)

In [8]:
prompt_template = "You are an AI assistant. Your task is to answer the following question: {question}"
prompt=PromptTemplate(
    template=prompt_template,
    input_variables=["question"],
)
chain = LLMChain(llm=llm, prompt=prompt, verbose=True)

In [9]:
chain("Tell me about the Transformers architecture and the relationship with LLM")

  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an AI assistant. Your task is to answer the following question: Tell me about the Transformers architecture and the relationship with LLM[0m


TypeError: _call() got an unexpected keyword argument 'stop'

In [12]:
llm = Together(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.7,
    max_tokens=128,
    top_k=1,
)

In [13]:
prompt_template = "You are an AI assistant. Your task is to answer the following question: {question}"
prompt=PromptTemplate(
    template=prompt_template,
    input_variables=["question"],
)
chain = LLMChain(llm=llm, prompt=prompt, verbose=True)

In [14]:
chain("Tell me about the Transformers architecture and the relationship with LLM")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an AI assistant. Your task is to answer the following question: Tell me about the Transformers architecture and the relationship with LLM[0m

[1m> Finished chain.[0m


{'question': 'Tell me about the Transformers architecture and the relationship with LLM',
 'text': '.\n\nThe Transformers architecture is a type of neural network model that was introduced in a paper called "Attention is All You Need" by Vaswani et al. in 2017. It is a model architecture that uses self-attention mechanisms to process input data, which allows it to handle long-range dependencies in the data more effectively than other types of neural network models.\n\nThe Transformers architecture is particularly well-suited for natural language processing (NLP) tasks, such as language translation and text summarization. This is because it can handle the complex and variable structure of natural'}