# Install and Import library

In [None]:
! pip install pinecone chromadb faiss-cpu grandalf langchain langchain-community langchain-groq langchain-pinecone pinecone-notebooks langchain-anthropic langchain-google-genai langchain-openai langchain-huggingface  pypdf

In [None]:
from langchain_groq import ChatGroq
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline, HuggingFaceEmbeddings

import os

In [None]:
from google.colab import userdata
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
os.environ["ANTHROPIC_API_KEY"] = userdata.get('ANTHROPIC_API_KEY')
os.environ["PINECONE_API_KEY"] = userdata.get('PINECONE_API_KEY')

# Model loading

## Chat Model

In [None]:
llm = HuggingFaceEndpoint(
    repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    # repo_id = "perplexity-ai/r1-1776",
    task="text-generation"
  )
# llm=HuggingFaceEndpoint(repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",task="text-generation")
hf_model = ChatHuggingFace(llm=llm)

try:
  hf_model.invoke("Hi I am Lokesh")
except Exception as e:
  print(e)

In [None]:
gemini_model = ChatGoogleGenerativeAI(model='gemini-1.5-pro')
gemini_model.invoke("Hi I am Lokesh")

AIMessage(content="Hi Lokesh!  It's nice to meet you. How can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-1.5-pro-002', 'safety_ratings': []}, id='run--63f22ef0-bd1b-47ee-b7a5-d7c8b2c05761-0', usage_metadata={'input_tokens': 5, 'output_tokens': 21, 'total_tokens': 26, 'input_token_details': {'cache_read': 0}})

In [None]:
groq_model = ChatGroq(model="deepseek-r1-distill-llama-70b")
groq_model.invoke("Hi I am Lokesh")

AIMessage(content='<think>\n\n</think>\n\nHi Lokesh! How can I assist you today? 😊', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 8, 'total_tokens': 26, 'completion_time': 0.065454545, 'prompt_time': 0.003477605, 'queue_time': 0.212858083, 'total_time': 0.06893215}, 'model_name': 'deepseek-r1-distill-llama-70b', 'system_fingerprint': 'fp_454c494f52', 'finish_reason': 'stop', 'logprobs': None}, id='run--9a547b0f-504e-4fdb-acc6-b4e540252087-0', usage_metadata={'input_tokens': 8, 'output_tokens': 18, 'total_tokens': 26})

In [None]:
open_ai_model = ChatOpenAI(model="gpt-4o")
open_ai_model.invoke("Hi I am Sunny")

AIMessage(content='Hello, Sunny! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 11, 'total_tokens': 23, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_90122d973c', 'id': 'chatcmpl-BTlGrZNgDqt5XfegLfW3KRkiV7WUs', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--5e95ffb7-f813-45d2-9c71-c0aa29d5a8d4-0', usage_metadata={'input_tokens': 11, 'output_tokens': 12, 'total_tokens': 23, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [None]:
claude_model = ChatAnthropic(model="claude-2")
claude_model.invoke("Hi I am Sunny")

AIMessage(content='Hello Sunny, nice to meet you!', additional_kwargs={}, response_metadata={'id': 'msg_01DbyMdpQvkYDvzRuQaKCEWc', 'model': 'claude-2.1', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 14, 'output_tokens': 13}, 'model_name': 'claude-2.1'}, id='run--85822104-c9b1-4dc7-8e15-619ad3b0556b-0', usage_metadata={'input_tokens': 14, 'output_tokens': 13, 'total_tokens': 27, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}})

In [None]:
llm = HuggingFacePipeline.from_model_id(
    model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    task='text-generation',
    pipeline_kwargs={
        "temperature": 0.5,
        "max_new_tokens":100
        }
)

model = ChatHuggingFace(llm=llm)
model.invoke("Hi I am Lokesh")

Device set to use cpu


AIMessage(content='<|user|>\nHi I am Lokesh</s>\n<|assistant|>\nSure, here\'s a revised version of the text with the added sentence:\n\n"The newest addition to the family is a baby girl, and we couldn\'t be happier. She\'s already making us laugh and lighting up our lives with her infectious smile and infectious energy."', additional_kwargs={}, response_metadata={}, id='run--f56e4289-29b0-4d50-9c12-83868ac42e79-0')

## Embeding Model

In [None]:
# @markdown # OpenAI

embedding_model = "text-embedding-3-large" # @param ["text-embedding-3-large","text-embedding-3-small","text-embedding-ada-002"]
dimensions = 64 #@param {type:"integer"}
set_dimension = True # @param {type:"boolean"}
query = "India is a growing country" # @param {"type":"string","placeholder":"India is a growing country"}

if set_dimension:
  openai_embedding = OpenAIEmbeddings(
      model=embedding_model,
      dimensions=dimensions,
  )
else:
  openai_embedding = OpenAIEmbeddings(
      model=embedding_model,
  )

result = openai_embedding.embed_query(query)
print(len(result),result)

64 [-0.22190459072589874, 0.2781512439250946, -0.019638171419501305, 0.047116201370954514, 0.007093434687703848, 0.0018865261226892471, -0.15810702741146088, 0.09076514095067978, 0.03236108645796776, -0.12227867543697357, 0.08452407270669937, -0.04599897190928459, 0.11403430253267288, -0.10378662496805191, -0.10417187958955765, 0.014938108623027802, 0.08899299055337906, -0.20634044706821442, -0.0308393444865942, -0.1088719367980957, -0.19863542914390564, -0.11588350683450699, 0.045228470116853714, -0.17860238254070282, -0.21543237566947937, 0.09045694023370743, -0.036541059613227844, 0.003484114073216915, -0.15533322095870972, 0.13183289766311646, 0.09477175027132034, 0.014052031561732292, 0.06156311556696892, -0.05451301857829094, -0.03172542154788971, 0.27722662687301636, -0.024501964449882507, -0.054859746247529984, -0.07520099729299545, -0.08383062481880188, -0.007314953953027725, 0.015670085325837135, -0.11287855356931686, 0.4059004783630371, 0.1887730062007904, 0.0106040341779589

In [None]:
# @markdown # Google GenAi

embedding_model = "models/gemini-embedding-exp-03-07" # @param ["models/gemini-embedding-exp-03-07","models/text-embedding-004","models/embedding-001"]
task_type = "retrieval_query" # @param ["None","task_type_unspecified","retrieval_query","retrieval_document","semantic_similarity","classification","clustering"]
transport = "None" # @param ["None","rest","grpc","grpc_asyncio"]
query = "India is a growing country" # @param {"type":"string","placeholder":"India is a growing country"}

func = lambda x : None if x=="None" else x
task_type = func(task_type)
transport = func(transport)

google_embedding = GoogleGenerativeAIEmbeddings(
    model=embedding_model,
    task_type=task_type,
    transport=transport
)

result = google_embedding.embed_query(query)
print(len(result),result)


3072 [-0.01328858733177185, -0.008076989091932774, -0.02803654596209526, -0.0385795421898365, -0.0012177267344668508, -0.009753282181918621, 0.018366066738963127, 0.0390302836894989, 0.005043548997491598, -0.023734668269753456, -0.011821064166724682, 0.006893169600516558, 0.019229836761951447, 0.03314683958888054, 0.11351391673088074, 0.020717905834317207, 0.0073205651715397835, -0.019075468182563782, 0.006459957454353571, -0.0205469261854887, -0.003892261302098632, 0.01249883696436882, 0.01834138110280037, 0.004717701114714146, -0.005373682361096144, -0.0032776377629488707, 0.006898732855916023, 0.02193414606153965, 0.021265285089612007, 0.014482101425528526, 0.005145769566297531, -0.027150483801960945, 0.023643454536795616, 0.012725071050226688, 0.017535502091050148, 0.015545469708740711, 0.022201204672455788, 0.0026000298094004393, 0.02127012610435486, 0.005257639102637768, -0.02616986259818077, 0.009095565415918827, -0.022166412323713303, -0.007171641103923321, -0.02820508368313312

In [None]:
# @markdown # Hugging Face
model_name = "all-MiniLM-L6-v2" # @param ["BAAI/bge-en-icl","all-MiniLM-L6-v2"]
query = "India is a growing country" # @param {"type":"string","placeholder":"India is a growing country"}
huggingface_embeddings=HuggingFaceEmbeddings(model_name=model_name)

result = huggingface_embeddings.embed_query(query)
print(len(result),result)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

384 [0.04652419313788414, -0.03841586783528328, -0.020850857719779015, 0.023816175758838654, 0.0687897652387619, -0.021041328087449074, 0.013878699392080307, -0.03014145791530609, -0.023620005697011948, 0.053420186042785645, 0.07592421770095825, -0.03870828449726105, 0.004604072775691748, 0.02037832699716091, 0.004490302409976721, 0.013204904273152351, -0.03557905927300453, -0.08155126869678497, -0.010968387126922607, -0.07982286810874939, -0.02969965711236, 0.03725045919418335, 0.009932871907949448, -0.026426194235682487, 0.03741077706217766, -0.001230493769980967, 0.0654568001627922, -0.08167450875043869, 0.015372431837022305, 0.053513478487730026, 0.010447172448039055, 0.14234355092048645, -0.025024374946951866, -0.001049715792760253, -0.04509156197309494, -0.0068294936791062355, 0.044003672897815704, 0.0837005227804184, 0.09523004293441772, -0.0424606055021286, 0.05784742161631584, -0.04957456886768341, 0.033043891191482544, -0.029475882649421692, 0.026376768946647644, -0.001838688

# Generate Structure output

In [None]:
from langchain.output_parsers import StructuredOutputParser , ResponseSchema
from langchain_core.prompts import PromptTemplate

In [None]:
schema=[
    ResponseSchema(name="first_fact",description="This is the first fact"),
    ResponseSchema(name="second_fact",description="This is the second fact"),
    ResponseSchema(name="third_fact",description="This is the third fact"),
]

structured_output_parser = StructuredOutputParser.from_response_schemas(schema)
format_instructions = structured_output_parser.get_format_instructions()

format_instructions

'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"first_fact": string  // This is the first fact\n\t"second_fact": string  // This is the second fact\n\t"third_fact": string  // This is the third fact\n}\n```'

In [None]:
template = PromptTemplate(
    template="Give 3 fact about {topic}\n {format_instruction}",
    input_variables=["topic"],
    partial_variables={"format_instruction": format_instructions}
)

chain = template | gemini_model | structured_output_parser

chain.invoke({"topic":"Men's love"})

{'first_fact': 'Men often express love through actions rather than words, such as fixing things, offering help, or providing for loved ones.',
 'second_fact': 'Men can struggle to articulate their feelings, sometimes due to societal expectations or emotional conditioning.',
 'third_fact': 'Men value respect and appreciation as much as they value love and affection.'}

In [None]:
chain.get_graph().print_ascii()

        +-------------+          
        | PromptInput |          
        +-------------+          
                *                
                *                
                *                
       +----------------+        
       | PromptTemplate |        
       +----------------+        
                *                
                *                
                *                
   +------------------------+    
   | ChatGoogleGenerativeAI |    
   +------------------------+    
                *                
                *                
                *                
   +------------------------+    
   | StructuredOutputParser |    
   +------------------------+    
                *                
                *                
                *                
+------------------------------+ 
| StructuredOutputParserOutput | 
+------------------------------+ 


# Pydantic Output parser

In [None]:
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser

In [None]:
class Person(BaseModel):
  name:str=Field(description="Name of the person")
  age:int=Field(description="Age of the person")
  city:str=Field(description="name of the city where the person is located")

parser = PydanticOutputParser(pydantic_object=Person)

format_instructions = parser.get_format_instructions()
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"description": "Name of the person", "title": "Name", "type": "string"}, "age": {"description": "Age of the person", "title": "Age", "type": "integer"}, "city": {"description": "name of the city where the person is located", "title": "City", "type": "string"}}, "required": ["name", "age", "city"]}\n```'

In [None]:
template = PromptTemplate(
    template='Generate the nammme, age and city of a fictional {place} person \n {format_instruction}',
    input_variables=['place'],
    partial_variables={'format_instruction':parser.get_format_instructions()}
)

chain = template | gemini_model | parser

result = chain.invoke({'place':'Malvan'})
result, result.name, result.age, result.city

(Person(name='Devendra Parab', age=42, city='Malvan'),
 'Devendra Parab',
 42,
 'Malvan')

In [None]:
type(result)

# Text Splitter

In [None]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

text = """LangChain is a powerful framework for building LLM applications. It helps in connecting language models with data sources, APIs, and other components. One of its key features is text processing, where it allows splitting large documents into smaller chunks. This is useful for vector databases, retrieval-augmented generation (RAG), and long-context handling. Another feature is its integration with various AI tools and libraries. This makes LangChain a great choice for developers working on AI-powered applications.

LangChain provides seamless support for working with embeddings and vector databases, allowing efficient document retrieval and query resolution. It also simplifies prompt engineering, making it easier to design structured prompts for LLMs. Developers can use LangChain to build sophisticated AI chatbots, content generators, and even research assistants.

The framework supports multiple LLM providers, including OpenAI, Hugging Face, and local models like Llama and Mistral. It offers various chain types, such as sequential and parallel chains, enabling flexible workflow automation. LangChain's integration with tools like Pinecone, ChromaDB, and FAISS ensures fast and scalable search capabilities.

With built-in memory components, LangChain allows chatbots and virtual assistants to maintain context over extended interactions. This helps in creating human-like conversations that feel more natural. Furthermore, LangChain supports function calling and API interaction, making it ideal for building AI-powered applications that require external data fetching.

The ecosystem also includes template-based prompt handling, making it easier to experiment with different LLM configurations. Developers can fine-tune their models, optimize performance, and integrate LangChain with various cloud-based AI solutions. Whether for research, automation, or enterprise applications, LangChain stands out as a robust and flexible framework for AI development.
."""

In [None]:
splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=200,
    chunk_overlap=50
)

chunks = splitter.split_text(text)

for i, chunk in enumerate(chunks,start=1):
  print(f"Chunk{i}:\n{chunk}\n{'-'*50}")



Chunk1:
LangChain is a powerful framework for building LLM applications. It helps in connecting language models with data sources, APIs, and other components. One of its key features is text processing, where it allows splitting large documents into smaller chunks. This is useful for vector databases, retrieval-augmented generation (RAG), and long-context handling. Another feature is its integration with various AI tools and libraries. This makes LangChain a great choice for developers working on AI-powered applications.
--------------------------------------------------
Chunk2:
LangChain provides seamless support for working with embeddings and vector databases, allowing efficient document retrieval and query resolution. It also simplifies prompt engineering, making it easier to design structured prompts for LLMs. Developers can use LangChain to build sophisticated AI chatbots, content generators, and even research assistants.
--------------------------------------------------
Chunk3:

In [None]:
splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n","\n"," ",""],
    chunk_size=500,
    chunk_overlap=100,
    length_function=len
)

chunks = splitter.split_text(text)

for i, chunk in enumerate(chunks,start=1):
  print(f"Chunk{i}:\n{chunk}\n{'-'*50}")

Chunk1:
LangChain is a powerful framework for building LLM applications. It helps in connecting language models with data sources, APIs, and other components. One of its key features is text processing, where it allows splitting large documents into smaller chunks. This is useful for vector databases, retrieval-augmented generation (RAG), and long-context handling. Another feature is its integration with various AI tools and libraries. This makes LangChain a great choice for developers working on
--------------------------------------------------
Chunk2:
with various AI tools and libraries. This makes LangChain a great choice for developers working on AI-powered applications.
--------------------------------------------------
Chunk3:
LangChain provides seamless support for working with embeddings and vector databases, allowing efficient document retrieval and query resolution. It also simplifies prompt engineering, making it easier to design structured prompts for LLMs. Developers can 

# Vector Dataset

1. SQL Based Database :- MySQl, ProgresSQL,

2. NoSQL Database :- MongoDB

3. Vector Database :- FAISS, Pinecone, AstraDB, WAVITE, ChromaDB, Weaviate

In [None]:
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader

loader=PyPDFLoader("https://raw.githubusercontent.com/sunnysavita10/genai_bootcamp/refs/heads/main/data/llama2.pdf")

docs = loader.load()
print(len(docs))
docs[0]

77


Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'https://raw.githubusercontent.com/sunnysavita10/genai_bootcamp/refs/heads/main/data/llama2.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Kh

## FAISS

In [None]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

In [None]:
vector_store = FAISS(
    embedding_function=huggingface_embeddings,
    index=faiss.IndexFlatIP(384),
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

vector_store.add_documents(docs)

['d6fc9445-4205-442a-b290-28a52e6be260',
 '7c105c6b-af62-4d2d-b7f1-f0ef85729b0d',
 '03120f83-cb6d-4248-aa5d-9b2afec7f191',
 '6250c2a9-0961-4f10-afae-5b2919a66ec1',
 '3ebcfec2-eebf-4fa6-a82f-81f9e4a1532a',
 '6a731af5-9692-42a4-8141-bf701513bd06',
 'cc216989-56af-4e79-a959-8da31fd25dba',
 'bdd42fe3-ac63-40ef-97d1-1466e26b5d74',
 '39f65404-21dc-4f9d-bb50-93dc13347c86',
 '8a1d6f6c-b4ef-474c-93da-7568b0047d47',
 'a2d49b6a-8545-4799-85fb-bedd4cba6f17',
 '172dc274-afeb-4735-80a7-693d1c4f99c9',
 '32c1bd0c-afd3-42e9-b9e8-d91165dfda54',
 '66cbaaa9-6e55-493d-8a9e-c4e907b24f30',
 '2cc3cf9d-a9e6-4677-8306-e18668f010f1',
 'b2f6294f-e528-4f04-ba40-b62f5cbf987b',
 '4f5b3589-7420-4639-b6d0-53523f66bed4',
 '80cdcb62-2c29-45a0-8356-9944c6832409',
 '5528aee7-4c36-4d8a-8c3a-4404b62de306',
 '2c414abf-a81e-4f59-9e2c-aa591b86bf15',
 'f2c2d894-539e-4b62-aedf-42cc1b386934',
 '5a2e7519-660e-4f66-ad63-52cda95c3828',
 '2e2a53a6-a22d-4bef-8ec1-9d6e0e9f24e0',
 '82f1aba0-d07c-4ee6-881a-e15299923179',
 'fe782533-4359-

In [None]:
vector_store.similarity_search(
    query="What is llama2 and what is a difference between llama2 and mistral?",
    k=2
)

[Document(id='2bc50633-1f1e-45f1-adda-b14eacd72860', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'https://raw.githubusercontent.com/sunnysavita10/genai_bootcamp/refs/heads/main/data/llama2.pdf', 'total_pages': 77, 'page': 76, 'page_label': '77'}, page_content='A.7 Model Card\nTable 52 presents a model card (Mitchell et al., 2018; Anil et al., 2023) that summarizes details of the models.\nModel Details\nModel DevelopersMeta AI\nVariations Llama 2comes in a range of parameter sizes—7B, 13B, and 70B—as well as\npretrained and fine-tuned variations.\nInput Models input text only.\nOutput Models generate text only.\nModel ArchitectureLlama 2isanauto-regressivelanguagemodelthatusesano

## Chroma DB

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False
)

further_split_doc=text_splitter.split_documents(docs)

In [None]:
from langchain_community.vectorstores import Chroma
persist_directory = "vdb_latest"

chroma_vdb = Chroma.from_documents(
    documents=further_split_doc,
    embedding=huggingface_embeddings,
    persist_directory=persist_directory
)

chroma_vdb.persist()

  chroma_vdb.persist()


In [None]:
vdb = Chroma(
    persist_directory=persist_directory,
    embedding_function=huggingface_embeddings
)

retriever=vdb.as_retriever()

print(retriever.search_type, retriever.search_kwargs)

retriever.get_relevant_documents("what is transformer and how it is working for llama2 model?",k=1)

similarity {}


  vdb = Chroma(
  retriever.get_relevant_documents("what is transformer and how it is working for llama2 model?",k=1)


[Document(metadata={'page_label': '7', 'trapped': '/False', 'source': 'https://raw.githubusercontent.com/sunnysavita10/genai_bootcamp/refs/heads/main/data/llama2.pdf', 'producer': 'pdfTeX-1.40.25', 'page': 6, 'keywords': '', 'author': '', 'moddate': '2023-07-20T00:30:36+00:00', 'creationdate': '2023-07-20T00:30:36+00:00', 'subject': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'total_pages': 77, 'title': '', 'creator': 'LaTeX with hyperref'}, page_content='sustainability program.∗∗ Our open release strategy also means that these pretraining costs will not need to\nbe incurred by other companies, saving more global resources.\n2.3 Llama 2Pretrained Model Evaluation\nIn this section, we report the results for theLlama 1and Llama 2base models, MosaicML Pretrained\nTransformer(MPT)†† models,andFalcon(Almazroueietal.,2023)modelsonstandardacademicbenchmarks.\nFor all the evaluations, we use our internal evaluations library. 

## Pinecone

In [None]:
from pinecone import Pinecone, ServerlessSpec

import time

pc = Pinecone()


index_name = "test-index" # @param {"type":"string"}

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [None]:
from langchain_pinecone import PineconeVectorStore
from uuid import uuid4

pc_vector_store = PineconeVectorStore(
    index=index,
    embedding=huggingface_embeddings,

)

uuids = [
    str(uuid4()) for  _ in range(len(further_split_doc))
]


print(uuids[0])

pc_vector_store.add_documents(documents=further_split_doc, ids=uuids)

9dbcae6c-aa0a-4a3e-8697-02226e9b95df


['9dbcae6c-aa0a-4a3e-8697-02226e9b95df',
 'd8d9c5b2-57a7-4b9a-8e84-a57fa5624f54',
 '2161aa4a-fecb-4769-a972-58dbd43ae85c',
 'bbd15c16-e8f0-45b8-8d00-3e8687376526',
 '28b56bd7-904b-4e0b-b29f-72fbbbe9251f',
 '87c9fef8-1aa9-41db-a08b-195226fa70b4',
 '3de874cb-d2fa-408f-8b66-470c3f374d1f',
 '36fcc732-81fe-4423-a45b-45de0b2dfb37',
 '28cde285-8769-4631-9016-c4c5ee7dbc1c',
 'c3219444-0f2a-4d2f-bd2d-88f318b3ae0c',
 'af41c227-d570-4194-bf51-216ba39e5e2a',
 '40ffe6a5-5e27-4374-b9fc-2f173c2e812f',
 '82ba117d-0840-4c24-8a7f-08bf71026b5f',
 'a8cfa077-7a58-4259-9c8c-70ef6410661a',
 '496e638e-18a0-48d2-945e-40612c8b8074',
 'e55f4492-1106-4480-950c-9e9807ea21ac',
 '43a7ded1-2ae9-436f-891d-e769d8c5d334',
 '6efb84e7-7819-46a1-a658-c8a9f91bdd97',
 '213f8f09-4c2b-4552-8e25-915908951e63',
 '1cf3f8ad-2a3d-4ca3-8569-c020f3cc6f8d',
 '381fd5e7-6bcb-4cfa-86a1-4459c0faee83',
 'cd7ca8c1-09e3-4421-ba4c-1bdb7c599eec',
 '81e73146-0771-4dd0-974b-fa30573700b4',
 'ca2c6e3b-5d46-4fc8-8389-53f5fbf19157',
 '1f8566b9-e37f-

In [None]:
results = vector_store.similarity_search(
    "what is llama2 and how it is different from mistral?",
    k=2,
    filter={
        "source":"https://raw.githubusercontent.com/sunnysavita10/genai_bootcamp/refs/heads/main/data/llama2.pdf"
    }
)

results

[Document(id='2bc50633-1f1e-45f1-adda-b14eacd72860', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'https://raw.githubusercontent.com/sunnysavita10/genai_bootcamp/refs/heads/main/data/llama2.pdf', 'total_pages': 77, 'page': 76, 'page_label': '77'}, page_content='A.7 Model Card\nTable 52 presents a model card (Mitchell et al., 2018; Anil et al., 2023) that summarizes details of the models.\nModel Details\nModel DevelopersMeta AI\nVariations Llama 2comes in a range of parameter sizes—7B, 13B, and 70B—as well as\npretrained and fine-tuned variations.\nInput Models input text only.\nOutput Models generate text only.\nModel ArchitectureLlama 2isanauto-regressivelanguagemodelthatusesano