In [1]:

import getpass
import os
from together import Together

os.environ["TOGETHER_API_KEY"] = getpass.getpass("Enter the api key:")

together_api_key = os.getenv('TOGETHER_API_KEY')

if not together_api_key:
    raise ValueError("API key is not set.")

client = Together(api_key = together_api_key)

In [2]:
import requests
from bs4 import BeautifulSoup, SoupStrainer

url = "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"

#it will look like request is coming from a browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

response = requests.get(url, headers=headers)

if response.status_code==200:
    strainer = SoupStrainer(class_=("post-title", "post-header", "post-content"))
    
    soup = BeautifulSoup(response.content, "html.parser", parse_only=strainer)
    
    content = soup.get_text()
    content_length = len(content)
    
    print(f"content length: {content_length}")
    
    print(content[:500])
    
else:
    print(f"status_code:{response.status_code}")


content length: 29295


      Prompt Engineering
    
Date: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: Lilian Weng


Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.
This post only focuses on prompt engineering fo


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_text(content)




In [4]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(splits)

import faiss
import numpy as np 

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

  from tqdm.autonotebook import tqdm, trange
  return torch._C._cuda_getDeviceCount() > 0


In [14]:
from langchain import hub


def     retrieve_documents(query, k=5):
    query_embd = model.encode([query])
    distances, indices = index.search(np.array(query_embd), k)
    return [splits[i] for i in indices[0]]


prompt = hub.pull("rlm/rag-prompt")


In [15]:
def format_docs(docs):
    return "\n\n".join(docs)

In [16]:
from langchain_core.output_parsers import StrOutputParser


def rag_chain(question):
    retrieved_docs = retrieve_documents(question)
    context = format_docs(retrieved_docs)
    input_data = {"context": context, "question": question}
    response = (prompt | client | StrOutputParser())(input_data)
    return response

In [17]:
# Test the RAG chain
output = rag_chain("What is Task Decomposition?")
print(output)

TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'together.client.Together'>