## Install Requirements

In [2]:
!pip -q install langchain
!pip -q install bitsandbytes accelerate transformers
!pip -q install datasets loralib sentencepiece
!pip -q install pypdf
!pip -q install sentence_transformers
!pip -q install unstructured # to extract unstrucutured data
!pip install tokenizers
!pip install faiss-cpu # As a vector database
!pip install xformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0
Collecting xformers
  Downloading xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl (222.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.7/222.7 MB[0m [31m301.6 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:18[0m
[?25hCollecting torch==2.3.0
  Downloading torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m980.4 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:29[0m
Collecting triton==2.3.0
  Downloading triton-2.3.0-cp310-cp310-manylinux_2_17_x86_

### Import Libraries

In [3]:
from langchain.document_loaders import UnstructuredURLLoader # Load data from urls
from langchain.text_splitter import CharacterTextSplitter # Split data into chunks
from langchain.embeddings import OpenAIEmbeddings # Create embeddings with the help of OpenAI API
from langchain.chat_models import ChatOpenAI # Use OpenAI as a chat model
from langchain.vectorstores import FAISS # Knowledge base
from langchain.vectorstores import Pinecone # Knowledge base
import pinecone
from langchain.chains import RetrievalQAWithSourcesChain # Provide answer with the references
from langchain.embeddings import HuggingFaceEmbeddings # Create embeddings with the help of Hugging Face
from transformers import AutoTokenizer, AutoModelForCausalLM # Create embeddings with the help of Hugging Face
from transformers import pipeline 
from langchain import HuggingFacePipeline
from huggingface_hub import notebook_login
import textwrap
import sys
import os
import torch
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

  from tqdm.autonotebook import tqdm
[nltk_data] Downloading package punkt to /home/shihab/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/shihab/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

## URLS

In [4]:

URLs=[
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablelm-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/'
]

In [5]:
loaders=UnstructuredURLLoader(urls=URLs)
data=loaders.load()

In [6]:
data

[Document(page_content='Open in app\n\nSign up\n\nSign in\n\nWrite\n\nSign up\n\nSign in\n\nPaper Review\n\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\n\nLlama 2: one of the best open source models\n\nAndrew Lukyanenko·Follow\n\nPublished inGoPenAI·15 min read·Jul 20, 2023\n\n--\n\nListen\n\nShare\n\nProject link\n\nModel link\n\nPaper link\n\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their approach to fine-tuning and safety enhancements for Llama 2-Chat to support the community in further developing and respon

### Split the Text into Chunks

In [7]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=200)

In [8]:
text_chunks=text_splitter.split_documents(data)

### Download the OpenAI Embeddings or Hugging Face Embeddings

In [10]:
embeddings=HuggingFaceEmbeddings()

In [11]:
query_result = embeddings.embed_query("Hello world")
len(query_result)

768

### Convert the Text Chunks into Embeddings and Create a Knowledge Base

In [12]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

### Create a Large Language Model (LLM) Wrapper

In [None]:
# llm=ChatOpenAI()

In [None]:
# llm

In [13]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True,)


model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                              load_in_8bit=True,
                                              #load_in_4bit=True
                                             )



In [None]:

pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 512,
                do_sample=True,
                top_k=30,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [None]:
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})

In [None]:

llm.predict("Please provide a concise summary of the Book Alchemist")