In [1]:
!pip install numpy==1.24.2
# !pip uninstall ipywidgets -y -q



In [2]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer,AutoModelForCausalLM
import transformers
from langchain import HuggingFacePipeline
from huggingface_hub import interpreter_login
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
URLs = ['https://ineuron.ai/']

### Load data from the URLs

In [4]:
loader = UnstructuredURLLoader(urls=URLs)
data = loader.load()

In [5]:
data

[Document(page_content='Learning with iNeuron made \xa0<>\n\nTake your career to the next level with industry ready programs,\n\nAn entire learning ecosystem at your fingertips to make learning fun.\n\nChoose from a range of tech programs and make your next big career switch.\n\nExplore Courses\n\n55%\n\nAverage Salary Hike\n\n400+\n\nDifferent Courses\n\n10000+\n\nCareer Transitions\n\n400+\n\nHiring Partners\n\nLIVE NOW\n\nSupport System\n\nOur support system is live again, this time it is bigger, better and faster.\n\nExperience a tech community like never seen before\n\nTake me there\n\nOur Courses\n\nView all\n\nView all\n\nSuccess Stories\n\nView all\n\nFresher\n\nAbhisekh Bhuyan\n\nMLOps engineer\n\nI got job as an MLOps engineer at synapsica at 13 LPA PPO because of "End to End projects MLOps" from iNeuron.\n\nFrom\n\nFresher\n\nTo\n\n79% Increment\n\nSubham Kanungo\n\nAssociate Data Scientist\n\nI just joined EY as data analyst. It would not be possible with the support of Kri

In [6]:
for i in data:
    print(i)

page_content='Learning with iNeuron made \xa0<>\n\nTake your career to the next level with industry ready programs,\n\nAn entire learning ecosystem at your fingertips to make learning fun.\n\nChoose from a range of tech programs and make your next big career switch.\n\nExplore Courses\n\n55%\n\nAverage Salary Hike\n\n400+\n\nDifferent Courses\n\n10000+\n\nCareer Transitions\n\n400+\n\nHiring Partners\n\nLIVE NOW\n\nSupport System\n\nOur support system is live again, this time it is bigger, better and faster.\n\nExperience a tech community like never seen before\n\nTake me there\n\nOur Courses\n\nView all\n\nView all\n\nSuccess Stories\n\nView all\n\nFresher\n\nAbhisekh Bhuyan\n\nMLOps engineer\n\nI got job as an MLOps engineer at synapsica at 13 LPA PPO because of "End to End projects MLOps" from iNeuron.\n\nFrom\n\nFresher\n\nTo\n\n79% Increment\n\nSubham Kanungo\n\nAssociate Data Scientist\n\nI just joined EY as data analyst. It would not be possible with the support of Krish sir and

### Chunks the data

In [7]:
text_splitter = CharacterTextSplitter(separator='\n',chunk_size=1000,chunk_overlap=200)

In [8]:
text_chunks = text_splitter.split_documents(data)

In [9]:
for text_chunk in text_chunks:
    print(text_chunk)

page_content='Learning with iNeuron made \xa0<>\nTake your career to the next level with industry ready programs,\nAn entire learning ecosystem at your fingertips to make learning fun.\nChoose from a range of tech programs and make your next big career switch.\nExplore Courses\n55%\nAverage Salary Hike\n400+\nDifferent Courses\n10000+\nCareer Transitions\n400+\nHiring Partners\nLIVE NOW\nSupport System\nOur support system is live again, this time it is bigger, better and faster.\nExperience a tech community like never seen before\nTake me there\nOur Courses\nView all\nView all\nSuccess Stories\nView all\nFresher\nAbhisekh Bhuyan\nMLOps engineer\nI got job as an MLOps engineer at synapsica at 13 LPA PPO because of "End to End projects MLOps" from iNeuron.\nFrom\nFresher\nTo\n79% Increment\nSubham Kanungo\nAssociate Data Scientist\nI just joined EY as data analyst. It would not be possible with the support of Krish sir and Sudhanshu sir.\nFrom\nTo\n100% Increment\nSayan Saha\nSoftware En

### Embeddings Model

In [10]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

### Test Embeddings

In [11]:
embeddings.embed_query('How are you')

[-0.03136904910206795,
 0.03783058002591133,
 0.07630818337202072,
 0.045699670910835266,
 -0.0012047188356518745,
 -0.0747690424323082,
 0.08157843351364136,
 0.01020936481654644,
 -0.11220557242631912,
 0.040734339505434036,
 -0.04470576345920563,
 -0.009028704836964607,
 -0.02297619730234146,
 -0.009182506240904331,
 0.007133569568395615,
 -0.035297244787216187,
 0.07896503806114197,
 -0.09915591031312943,
 -0.1210382729768753,
 0.032573699951171875,
 -0.0991877093911171,
 0.03196950629353523,
 0.0017395204631611705,
 0.08815879374742508,
 -0.026670053601264954,
 0.0170916598290205,
 -0.041350770741701126,
 -0.039181821048259735,
 0.03500913828611374,
 -0.07608217000961304,
 -0.06472425162792206,
 0.02267630398273468,
 -0.04956904798746109,
 -0.027594158425927162,
 -0.0572696328163147,
 -0.04053007811307907,
 0.01654030568897724,
 -0.10059665143489838,
 -0.048732444643974304,
 -0.022784540429711342,
 0.02085503190755844,
 -0.06302399188280106,
 -0.019444245845079422,
 -0.02730960771

### Initiate Pinecone

In [12]:
from dotenv import dotenv_values
import os
scrects = dotenv_values('.env')

In [13]:
scrects['PINECONE_API_KEY']

'4adcbd2c-90a8-484f-b9a9-dee18a2e2056'

In [14]:
PINECONE_API_KEY = scrects['PINECONE_API_KEY']
PINECONE_API_ENV = scrects['PINECONE_API_ENV']

In [15]:
PINECONE_API_KEY,PINECONE_API_ENV

('4adcbd2c-90a8-484f-b9a9-dee18a2e2056', 'gcp-starter')

In [16]:
import pinecone

pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
index_name = 'custom-bot'

### Convert Text into Vector

In [17]:
docesearch = Pinecone.from_texts([doc.page_content for doc in text_chunks],embeddings,index_name=index_name)

In [18]:
docesearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x7992c9930fa0>

### Create LLM Wrapper

In [19]:
import os
from huggingface_hub import login

login(token=scrects['HUGGINGFACE_TOKEN'])

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/shihab/.cache/huggingface/token
Login successful


In [24]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",use_auth_token=True)



In [28]:
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",use_auth_token=True,device_map='auto',torch_dtype=torch.float16)

Downloading shards:   0%|          | 0/2 [00:20<?, ?it/s]


KeyboardInterrupt: 

### Create HuggingFace Pipeline

In [None]:
pipe = transformers.pipeline('text-generation',model=model,tokenizer=tokenizer,torch_dtype=torch.bfloat16,device_map='auto',
                             max_new_tokens=512,do_sample=True,top_k=30,num_return_sequences=1,eos_token_id=tokenizer.eos_token_id)

### Initialize LLM

llm = HuggingFacePipeline(pipeline=pipe,model_kwargs={'temperatur':0})

# Test LLM
llm.predict("Provide a summery of Spider-man movie")

### Initialize Retrieval QA

from langchain.chains import RetrievalQA

In [None]:
qa = RetrievalQA.from_chain_type(llm=model,chain_type="stuff", retriever=docesearch.as_retriever(),return_source_documents=True)