In [None]:
%pip install -r requirements.txt --upgrade --quiet

In [None]:
import os 
os.environ = {**os.environ, **{env.split("=")[0]: env.split("=")[1] for env in open(".env", "r").readlines()}}

In [None]:
payload = """You are an AI assistant for the Philipp Schmids blog. The blog is located at https://philschmid.de.
You are given the following extracted parts of a long document and a question. Provide a conversational answer to the question.
You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
If the question includes a request for code, provide a code block directly from the blog.
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
Question: {question}
=========
{context}
=========
Answer in Markdown:"""

question = "Is PyTorch 2.0 backward compatible?"
context = """n December 2, 2022, the PyTorch Team announced [PyTorch 2.0](https://pytorch.org/get-started/pytorch-2.0/) at the PyTorch Conference, focused on better performance, being faster, more pythonic, and staying as dynamic as before.

This blog post explains how to get started with PyTorch 2.0 and Hugging Face Transformers today. It will cover how to fine-tune a BERT model for Text Classification using the newest PyTorch 2.0 features.

You will learn how to:

1. [Setup environment & install Pytorch 2.0](#1-setup-environment--install-pytorch-20)
2. [Load and prepare the dataset](#2-load-and-prepare-the-dataset)
3. [Fine-tune & evaluate BERT model with the Hugging Face `Trainer`](#3-fine-tune--evaluate-bert-model-with-the-hugging-face-trainer)
4. [Run Inference & test model](#4-run-inference--test-model)

Before we can start, make sure you have a [Hugging Face Account](https://huggingface.co/join) to save artifacts and experiments.

## Quick intro: Pytorch 2.0

PyTorch 2.0 or, better, 1.14 is entirely backward compatible. Pytorch 2.0 will not require any modification to existing PyTorch code but can optimize your code by adding a single line of code with `model = torch.compile(model)`.
If you ask yourself, why is there a new major version and no breaking changes? The PyTorch team answered this question in their [FAQ](https://pytorch.org/get-started/pytorch-2.0/#faqs): _“We were releasing substantial new features that we believe change how you meaningfully use PyTorch, so we are calling it 2.0 instead.”_

Those new features include top-level support for TorchDynamo, AOTAutograd, PrimTorch, and TorchInductor.

This allows PyTorch 2.0 to achieve a 1.3x-2x training time speedups supporting [today's 46 model architectures](https://github.com/pytorch/torchdynamo/issues/681) from [HuggingFace Transformers](https://github.com/huggingface/transformers)

If you want to learn more about PyTorch 2.0, check out the official [“GET STARTED”](https://pytorch.org/get-started/pytorch-2.0/).

---

Now we know how PyTorch 2.0 works, let's get started. 🚀"""


In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI, VectorDBQA


In [None]:
from datasets import load_dataset
from random import randint

# load dataset and create list of string with page content
dataset_id = "philschmid/philschmid-de-blog"
ds = load_dataset(dataset_id)["train"]

print(f"dataset length: {len(ds)}")
print(ds[randint(0, len(ds))].keys())

In [None]:

from langchain.text_splitter import MarkdownTextSplitter
from transformers import AutoTokenizer
from random import randint


tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
text_splitter = MarkdownTextSplitter.from_huggingface_tokenizer(tokenizer=tokenizer, chunk_size=500, chunk_overlap=12)
texts = text_splitter.create_documents(ds["content"],metadatas=[{"source": s["url"], "title": s["title"],"date": s["date"], "tags": s["tags"] } for s in ds])


print(f"dataset length: {len(texts)}")
print(texts[randint(0, len(texts))])

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# create vector store
embeddings = HuggingFaceEmbeddings()


# create vector store and save it to disk
# db = FAISS.from_documents(texts, embeddings)
# db.save_local("faiss_index")

# # load from disk 
db = FAISS.load_local("faiss_index", embeddings)


In [None]:
from IPython.display import display, Markdown
from langchain.chains import VectorDBQA
from langchain.llms import HuggingFaceHub, OpenAI,Anthropic

# llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", huggingfacehub_api_token=os.environ["HF_API_KEY"])
# llm = OpenAI()
import os 

llm = Anthropic(top_p=0.9, temperature=1)

qa = VectorDBQA.from_chain_type(llm=llm,
                                chain_type="stuff", 
                                vectorstore=db, 
                                return_source_documents=True,
                                k=2
)

# def query_blog(query:str , model="t5"):
#   if model == "t5":
#     llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", huggingfacehub_api_token=os.environ["HF_API_KEY"])
#     print("using HuggingFaceHub model: google/flan-t5-xxl")
#   elif model == "ul2":
#     llm = HuggingFaceHub(repo_id="google/flan-ul2", huggingfacehub_api_token=os.environ["HF_API_KEY"])
#     print("using HuggingFaceHub model: google/flan-ul2")
#   elif model == "oa":
#     llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", huggingfacehub_api_token=os.environ["HF_API_KEY"])
#     print("using HuggingFaceHub model: OpenAssistant/oasst-sft-1-pythia-12b")
#   else: 
#     llm = OpenAI()
#     print("using OpenAI model")
#   qa.combine_documents_chain.llm_chain.llm = llm
  
#   res = qa({"query": query})
#   answer = res["result"].strip()
#   ref = ",".join([ f"[[{idx+1}]]({url})" for idx, url in enumerate(set([r.metadata["url"] for r in res["source_documents"]]))])
#   display(Markdown(answer + " " + ref))

In [None]:
query = "How to use Hugging Face Transformers with PyTorch 2.0"
res = qa({"query": query})

# flan t5
# query_blog(query, model="t5")
# # flan ul2
# query_blog(query, model="ul2")
# openai
# query_blog(query, model="oa")
print(res["result"])

In [None]:
context="""1. Setup environment & install Pytorch 2.0\n\nOur first step is to install PyTorch 2.0 and the Hugging Face Libraries, including `transformers` and `datasets`.\n\n```python\n# Install PyTorch 2.0 with cuda 11.7\n!pip install "torch>=2.0" --extra-index-url https://download.pytorch.org/whl/cu117 --upgrade --quiet\n```\n\nAdditionally, we are installing the latest version of `transformers` from the `main` git branch, which includes the native integration of PyTorch 2.0 into the `Trainer`.\n\n```python\n# Install transformers and dataset\n!pip install "transformers==4.27.1" "datasets==2.9.0" "accelerate==0.17.1" "evaluate==0.4.0" tensorboard scikit-learn\n# Install git-fls for pushing model and logs to the hugging face hub\n!sudo apt-get install git-lfs --yes\n```\n\nThis example will use the [Hugging Face Hub](https://huggingface.co/models) as a remote model versioning service. To push our model to the Hub, you must register on the [Hugging Face](https://huggingface.co/join). If you already have an account, you can skip this step. After you have an account, we will use the `login` util from the `huggingface_hub` package to log into our account and store our token (access key) on the disk.\n\n```python\nfrom huggingface_hub import login\n\nlogin(\n  token="", # ADD YOUR TOKEN HERE\n  add_to_git_credential=True\n)\n\n"""
qa.combine_documents_chain.llm_chain.prompt.format(context=context,question=query)

In [None]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import HuggingFaceHub
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# create vector store
embeddings = HuggingFaceEmbeddings()

db = FAISS.load_local("faiss_index", embeddings)
llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", huggingfacehub_api_token=os.environ["HF_API_KEY"], model_kwargs={"do_sample": True, "max_new_tokens": 512, "top_p":0.8})


In [None]:
query = "How can i deploy BERT?"

docs = db.similarity_search(query=query, k=1)

In [None]:
from langchain import PromptTemplate

oa_prompt = PromptTemplate(
  input_variables=["summaries", "question"],
  template="""<|prompter|>Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{summaries}\n\nQuestion: {question}\nHelpful Answer:<|endoftext|><|assistant|>"""
)

chain = load_qa_chain(llm, chain_type="stuff", prompt=oa_prompt)

chain({"input_documents": docs, "question": query}, return_only_outputs=True)


In [None]:
qa.llm

In [None]:
https://python.langchain.com/en/latest/modules/chains/index_examples/vector_db_qa.html#return-source-documents

In [None]:
from langchain.chains import RetrievalQA
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import HuggingFaceHub
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# create vector store
embeddings = HuggingFaceEmbeddings()

db = FAISS.load_local("faiss_index", embeddings)
llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", huggingfacehub_api_token=os.environ["HF_API_KEY"], 
                     model_kwargs={"do_sample": True, "max_new_tokens": 512, "top_p":0.8})


oa_prompt = PromptTemplate(
  input_variables=["context", "question"],
  template="""<|prompter|>Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:<|endoftext|><|assistant|>"""
)

# chain_type_kwargs = {"prompt": oa_prompt}
qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=db.as_retriever(search_kwargs={"k": 2}),
                                #  chain_type_kwargs=chain_type_kwargs,
                                 return_source_documents=True
                                 )
qa.combine_documents_chain.llm_chain.prompt = oa_prompt



query="How can deploy BERT with Amazon SageMaker?"
res = qa({"query": query})
print(res["result"])

In [None]:
from langchain import PromptTemplate

question_prompt_template = """<|prompter|>Use the following portion of a long document to see if any of the text is relevant to answer the question. 
Return any relevant text.
{context}
Question: {question}
Relevant text, if any:<|endoftext|><|assistant|>"""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """<|prompter|>Given the following extracted parts of a long document and a question, create a final answer. 
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

QUESTION: {question}
=========
{summaries}
=========
Helpful Answer:<|endoftext|><|assistant|>"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)
chain = load_qa_chain(llm, chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)

query="How can deploy BERT with Amazon SageMaker?"

res = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
res

# Summarizer

In [None]:
from langchain.docstore.document import Document

article="""Hugging Face, a top supplier of open-source machine learning tools, and AWS have joined together to increase the access to artificial intelligence (AI). Hugging Face's cutting-edge transformers and natural language processing (NLP) models will be made available to AWS customers as a result of the cooperation, making it simpler for them to develop and deploy AI applications.

Hugging Face has become well-known in the AI community for its free, open-source transformers library, which is used by thousands of programmers all around the world to build cutting-edge AI models for a range of tasks, including sentiment analysis, language translation, and text summarization. By partnering with AWS, Hugging Face will be able to provide its tools and expertise to a broader audience.

Generative AI has the potential to transform entire industries, but its cost and the required expertise puts the technology out of reach for all but a select few companies, said Adam Selipsky, CEO of AWS. Hugging Face and AWS are making it easier for customers to access popular machine learning models to create their own generative AI applications with the highest performance and lowest costs. This partnership demonstrates how generative AI companies and AWS can work together to put this innovative technology into the hands of more customers.

AWS has increased the scope of its own generative AI offerings. For instance, it improved the AWS QuickSight Q business projections to comprehend common phrases like "show me a forecast." The Microsoft-owned GitHub Copilot, which uses models built from OpenAI's Codex, has competition in the form of AWS's Amazon CodeWhisperer, an AI programming assistant that autocompletes software code by extrapolating from a user's initial hints.

As part of the collaboration, Hugging Face's models will be integrated with AWS services like Amazon SageMaker, a platform for creating, honing, and deploying machine learning models. This will make it simple for developers to create their own AI applications using pre-trained models from Hugging Face without needing to have considerable machine learning knowledge.

The future of AI is here, but it’s not evenly distributed, said Clement Delangue, CEO of Hugging Face."""
article2 = """Accessibility and transparency are the keys to sharing progress and creating tools to use these new capabilities wisely and responsibly. Amazon SageMaker and AWS-designed chips will enable our team and the larger machine learning community to convert the latest research into openly reproducible models that anyone can build on.

Hugging Face offers a library of over 10,000 Hugging Face Transformers models that you can run on Amazon SageMaker. With just a few lines of code, you can import, train, and fine-tune pre-trained NLP Transformers models such as BERT, GPT-2, RoBERTa, XLM, DistilBert, and deploy them on Amazon SageMaker. 

Hugging Face and AWS's partnership is anticipated to have a big impact on the AI market since it will make it possible for more companies and developers to use cutting-edge AI tools to generate unique consumer solutions."""

docs = [Document(page_content=article), Document(page_content=article2)]

In [None]:
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import HuggingFaceHub

llm = HuggingFaceHub(repo_id="google/flan-ul2", huggingfacehub_api_token=os.environ["HF_API_KEY"],
                      model_kwargs={"do_sample": True, "max_new_tokens": 512, "top_p":0.8})

chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True)
sum = chain({"input_documents": docs}, return_only_outputs=True)
sum

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """Write a concise summary of the following:

{text}

CONCISE SUMMARY:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
refine_template = (
    "Your job is to produce a final summary\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary"
    "If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)
chain = load_summarize_chain(llm, chain_type="refine", return_intermediate_steps=True, question_prompt=PROMPT, refine_prompt=refine_prompt)
chain({"input_documents": docs}, return_only_outputs=True)

In [None]:
chain.prompt_length(docs)

In [None]:
llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", huggingfacehub_api_token=os.environ["HF_API_KEY"], 
                     model_kwargs={"do_sample": True, "max_new_tokens": 512, "top_p":0.8})


oa_prompt = PromptTemplate(
  input_variables=["text"],
  template="""<|prompter|>Write a concise summary of the following:


{text}<|endoftext|><|assistant|>"""
)


chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)# return_intermediate_steps=True)
sum = chain({"input_documents": docs})
sum["output_text"]

In [None]:
# a list of dictonary with random firstnames and lastnames

user = [
  { "firstname": "John", "lastname": "Doe"},
  { "firstname": "Jane", "lastname": "Doe"},
  { "firstname": "John", "lastname": "Smith"},
  { "firstname": "Jane", "lastname": "Smith"},
  ]