https://ineuron.ai/sitemap.xml

In [None]:
!pip -q install langchain
!pip -q install bitsandbytes accelerate transformers
!pip -q install sentence_transformers

In [None]:
!pip -q install unstructured

In [None]:
!pip install pinecone-client==2.2.4

In [1]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
import pinecone
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from langchain import HuggingFacePipeline
from huggingface_hub import notebook_login
import torch

  from tqdm.autonotebook import tqdm
  _torch_pytree._register_pytree_node(


## Extract data from url

In [None]:
!pip install numpy==1.24.4

In [None]:
!pip install libmagic

In [2]:
URLs = [
    "https://ineuron.ai/"
]

In [3]:
loader = UnstructuredURLLoader(urls = URLs)
data = loader.load()

libmagic is unavailable but assists in filetype detection on file-like objects. Please consider installing libmagic for better results.
Error fetching or processing https://ineuron.ai/, exception: Invalid file. The FileType.UNK file type is not supported in partition.


In [4]:
data

[]

## Chunkins

In [None]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=200)

In [None]:
text_chunks=text_splitter.split_documents(data)

In [None]:
len(text_chunks)

In [None]:
text_chunks[0]

## Embedding model

In [None]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [None]:
query_result = embeddings.embed_query("Hello world")
len(query_result)

In [None]:
query_result

In [None]:
import os

PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '6ee7dc8a-43fb-4480-8774-f1d02577386d')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'gcp-starter')

In [None]:
import pinecone
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "website-bot" # put in the name of your pinecone index here


In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

## Create LLM wrapper

In [None]:
notebook_login()

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True,)


model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                              load_in_8bit=True,
                                              #load_in_4bit=True
                                             )

In [None]:
pipe = transformers.pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 512,
                do_sample=True,
                top_k=30,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [None]:
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})

In [None]:
llm.predict("Please provide a concise summary of the Book Harry Potter")

## Initialize the Retrieval QA

In [None]:
from langchain.chains import RetrievalQA

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [None]:
query = "Tell me the course price of Full Stack Data Science with Generative AI provide by ineuron"


In [None]:
print(qa.run(query))