In [None]:
# Chat Models: GPT-3.5 Turbo and GPT-4

In [None]:
import os
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

llm = ChatOpenAI()
output = llm.invoke('Explain quantum mechanics in one sentence.')
print(output.content)
# help(ChatOpenAI)

In [None]:
from langchain.schema import (
    SystemMessage,
    AIMessage,
    HumanMessage
)

message = [
    SystemMessage(content='You are a physicist and respond only in German.'),
    HumanMessage(content='Explain quantum mechanics in one sentence.')
]

output = llm.invoke(message)
print(output.content)


### In-Memory Cache - Caching LLM Responses

In [None]:
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI
llm = OpenAI(model_name='gpt-3.5-turbo-instruct')


In [None]:
%%time
from langchain.cache import InMemoryCache
set_llm_cache(InMemoryCache())
prompt = 'Tell me a joke that a toddler can understand.'
llm.invoke(prompt)

In [None]:
%%time
llm.invoke(prompt)


### SQLite Caching

In [None]:
from langchain.cache import SQLiteCache
set_llm_cache(SQLiteCache(database_path=".langchain.sqlite.db"))

prompt2 = "Tell me a joke"


In [None]:
%%time
# first request (not in cache, takes longer)

llm.invoke(prompt2)



In [None]:
%%time
# second request (cached, faster)
llm.invoke(prompt2)

## LLM Streaming


In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()
prompt = 'Write a rock song about the Moon and a Raven.'
print(llm.invoke(prompt).content)

In [None]:
for chunk in llm.stream(prompt):
    print(chunk.content, end='', flush=True)

# 20240301 - PromptTemplates

In [None]:
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)
template = '''You are an experienced virologist.
Write a few sentences about the following virus "{virus}" in "{language}".'''
prompt_template = PromptTemplate.from_template(template=template)

prompt = prompt_template.format(virus='hiv', language='english')

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)
output = llm.invoke(prompt)
print(output.content)


# 20240301 - ChatPromptTemplate

In [None]:
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content='You respond only in the JSON format.'),
        HumanMessagePromptTemplate.from_template('Top {n} countries in {area} by population. Show the population in millions.')
    ]
)
messages = chat_template.format_messages(n='20', area='Africa')
print(messages)

from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
output = llm.invoke(messages)
print(output.content)



20240301 - Simple Chains

In [None]:
from langchain_openai import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI()
template = '''You are an experienced virologist.
Write a few sentences about the following virus "{virus}" in "{language}".'''

prompt_template = PromptTemplate.from_template(template=template)

chain = LLMChain(
    llm=llm,
    prompt=prompt_template,
    verbose=True
)

output = chain.invoke({'virus': 'HSV', 'language': 'English'})
print(output)

In [None]:
template = 'What is the capital of {country}?. List the top 3 places to visit in that city. Use bullet points.'
prompt_template = PromptTemplate.from_template(template=template)
chain = LLMChain(
    llm=llm,
    prompt=prompt_template,
    verbose=True
)

country = input('Enter Country: ')
output = chain.invoke(country)
print(output)


# 20240301 - Sequential Chains
With Sequential chains, you can make a series of calls to one or more LLMs. You can take the output from one chain and use it as the input to another chain.

There are two types of sequential chains:
1. SimpleSequentialChain
2. General form of sequential chains

SimpleSequentialChain represents a series of chains, where each individual chain has a single input and a single output, and the output of one step is used as input to the next.



In [None]:
from langchain_openai import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain

llm1 = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.5)
prompt_template1 = PromptTemplate.from_template(
     template='You are an experienced scientist and Python programmer. Write a function that implements the concept of {concept}.'
)

chain1 = LLMChain(llm=llm1, prompt=prompt_template1)

llm2 = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=1.2)
prompt_template2 = PromptTemplate.from_template(
    template='Given the Python function {function}, describe it as detailed as possible.'
)

chain2 = LLMChain(llm=llm2, prompt=prompt_template2)

overall_chain = SimpleSequentialChain(chains=[chain1, chain2], verbose=True)

output = overall_chain.invoke('linear regression')
print(output['output'])


# 20240301 - Agents - LangChain Agents in Action: Python REPL


In [None]:
pip install -q langchain_experimental


In [None]:
from langchain_experimental.utilities import PythonREPL
python_repl = PythonREPL()

python_repl.run('print([n for n in range(1, 100) if n % 13 == 0])')

In [None]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
agent_executor = create_python_agent(
    llm=llm,
    tool=PythonREPLTool(),
    verbose=True
)

agent_executor.invoke('Calculate the square root of the factorial of 12 and display it with 4 decimal points.')


In [None]:
import math

fact12 = math.factorial(12)
sqrtFact12 = math.sqrt(fact12)
roundSqrtFact12 = round(sqrtFact12, 4)

print(fact12, sqrtFact12, roundSqrtFact12)

In [None]:
response = agent_executor.invoke('What is the answer to 5.1 ** 7.3?')

In [None]:
response


# 20240301 - Langchain tools- DuckDuckGo and Wikipedia, Google Search
Langchain tools are like specialized apps for yout LLM. They are tiny code modules that allow it to access information and services.

These tools connect your LLM to search engines, databases, APIs, and more, expanding its knowledge and capabilities.


In [None]:
pip install -q duckduckgo-search

In [None]:
from langchain.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun()

output = search.invoke('Where was Adolf Hitler born?')
print(output)

      

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain.tools import DuckDuckGoSearchResults

wrapper = DuckDuckGoSearchAPIWrapper(region='en-GB', max_results=3, safesearch='moderate')
search = DuckDuckGoSearchResults(api_wrapper=wrapper, source='news')
output = search.run('Newport')
print(output)

In [None]:
search2 = DuckDuckGoSearchResults()
output2 = search2.run('Freddie Mercury and Queen')
print(output2)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import re
pattern = r'snippet: (.*?), title: (.*?), link: (.*?)\],'
matches = re.findall(pattern, output, re.DOTALL)

for snippet, title, link in matches:
    print(f'Snippet: {snippet}\nTitle: {title}\nLink: {link}\n')
    print('-' * 50)
    

# Wikipedia tool

In [None]:
pip install -q wikipedia

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=5000)
wiki = WikipediaQueryRun(api_wrapper=api_wrapper)
wiki.invoke({'query': 'llamaindex'})


In [None]:
wiki.invoke('Great Wall of China')

# 20240301 - Reasoning and Acting (ReAct)
ReAct is a new approach that combines reasoning (chain of thoughts prompting) and acting capabilities of LLMs.

With ReAct LLMs generate reasoning traces and task-specific actions in an interleaved manner.


# Creating a ReAct Agent

In [None]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain.agents import Tool, AgentExecutor, initialize_agent, create_react_agent
from langchain.tools import DuckDuckGoSearchRun, WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

template = '''
Anser the following questions as best as you can. Translate to FRENCH.
Questions: {q}
'''
prompt_template = PromptTemplate.from_template(template)
prompt = hub.pull('hwchase17/react')
# print(type(prompt))
# print(prompt.input_variables)
# print(prompt.template)
# 1. PythonREPL tool
python_repl = PythonREPLTool()
python_repl_tool = Tool(
    name='Python REPL',
    func=python_repl.run,
    description='Useful when you need to use Python to answer a question. You should input Python code.'
)

# 2. Wikipedia Tool (for searchin Wikipedia)
wiki_api_wrapper = WikipediaAPIWrapper()
wikipedia = WikipediaQueryRun(api_wrapper=wiki_api_wrapper)
wikipedia_tool = Tool(
    name='Wikipedia',
    func=wikipedia.run,
    description='Useful for when you need to look up a topic, country, or person on Wikipedia.'
)

# 3. DuckDuckGo Search Tool (for general web searches)
ddg_search = DuckDuckGoSearchRun()
duckduckgo_tool = Tool(
    name='DuckDuckGo Search',
    func=ddg_search.run,
    description='Useful for when you need to perform an internet search to find information that another tool can\'t provide.'
)

tools = [python_repl_tool, wikipedia_tool, duckduckgo_tool]

agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=10
)


In [None]:
# question = 'Generate the first 20 numbers in the Fibonacci series.'
# question = 'Who is the current Prime Minister of U.K.?'
question = 'Tell me about Makaveli early life'

output = agent_executor.invoke({
    'input': prompt_template.format(q=question)
})
print(output['output'])

# 20240302 - Embeddings
Embeddings are the core of building LLMs applications. Text embeddings are numeric respresentations of text and are used in NLP and ML tasks.

Embedings Applications
----------------------
Text classification: assigning a label to a piece of text.
Text Clustering: grouping together pieces of text that are similar in meaning.
Question-Answering: answering a question posed in natural language.



# Vector Databases
One of the biggest challenges of AI applications is efficient data processing
Many of the latest AI applications rely on vector embeddings. Chatbots, question-answering systems, and machine translation rely on vector embeddings.

In [None]:
# Project = Books Recommendation System
import openai
import pandas as pd
import numpy as np

# Authentication
def openai_authenticate(keyfile):
    import openai
    with open(keyfile, 'r') as f:
        api_key = f.read().strip('\n')
        assert api_key.startsWith('sk-'), 'Error loading the API key. The API key starts with "sk-"'
    openai.api_key = api_key

openai_authenticate('../key.txt')


# Loading the dataset into Pandas Dataframe
df = pd.read_csv('./books_dataset.csv')
df.dropna(inplace=True)
df = df.sort_values('average_rating', ascending=False).head(2000)
# df.iloc[97]
df

# Vector Databases
vector databases are a new type of database, designed to store and query unstructured data.

Unstructured data is data that does not have a fixed schema, such as text, images, and audio.

SQL vs Vector databases
Pipeline for vectir databases

Vector databases use a combination fo different optimized algorithms that all participate in Approximate Nearest Neighbor (ANN) search


In [None]:
import os
# from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)



In [None]:
pip install -q pinecone-client


In [None]:
pip install --upgrade -q pinecone-client


In [None]:
pip show pinecone-client

In [None]:
from pinecone import Pinecone

pc = Pinecone()
pc.list_indexes()

# Pinecone Indexes
An index is the highest level org unit of a vector data in Pinecone

It accepts and stores vectors, serves queries over the vectors it contains, and does other vector operations over its contents.

- Serverless indexes: you dont configure or manage any compute or storage resources (they scale automatically).
- Pod-based indexes: you choose one or more preconfigured units of hardware (pods).
- 

In [None]:
# pc.list_indexes()[0]
# pc.describe_index('indexname')

pc.list_indexes().names()

In [None]:
from pinecone import PodSpec
index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=PodSpec(
            environment='gcp-starter'
        )
    )
    print('Index created! Yay!')
else:
    print(f'Index {index_name} already exists1')


In [None]:
if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name}')
    pc.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

In [None]:
# working with index details + stats
index = pc.Index(index_name)
index.describe_index_stats()

In [None]:
## inserting vectors
import random

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list('abcde')

index_name = 'langchain'
index = pc.Index(index_name)

index.upsert(vectors=zip(ids, vectors))


In [None]:
## updating vectors
index.upsert(vectors=[('c', [0.5] * 1536)])

In [None]:
## fetching vectors
# index = pc.Index(index_name)
index.fetch(ids=['a', 'b', 'e'])

In [None]:
## deleting vectors

index.delete(ids=['b', 'c'])

In [None]:
index.describe_index_stats()

In [None]:
## query index
query_vector = [random.random() for _ in range(1536)]

index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

# Pinecome Namespaces
- Pinecone allows you to partition the vectors in an index into namespaces.
- Queries and other operations are scoped to a specific namespace, allowing different requests to search different subsets of your index.
- Key information about namespaces:
  - every index consists of one or more namespaces.
  - each vector exists in exactly one namespace.
  - namespaces are uniquely identified by a namespace name.
  - the default namespace is represented by the empty string and is used if no specific namespace is specified.

## Namespaces


In [None]:
# index.describe_index_stats()
index = pc.Index(index_name)

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list('abcde')

index.upsert(vectors=zip(ids, vectors))

 

In [None]:
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
# print(vectors)
ids = list('xyz')

index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

In [None]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
# print(vectors)
ids = list('mn')

index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

In [None]:
index.describe_index_stats()


In [None]:
index.fetch(ids=['x'])

In [None]:
index.fetch(ids=['x'], namespace='first-namespace')

In [None]:
## deleting namespace vectors either single or all
# delete single or multiple
index.delete(ids=['x', 'z'], namespace='first-namespace')


In [None]:
# delete all vectors in namespace
index.delete(delete_all=True, namespace='first-namespace')

# Splitting and Embedding Text Using Langchain


In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

from langchain.text_splitter import RecursiveCharacterTextSplitter
with open('./churchill_speech.txt') as f:
    churchill_speech = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

chunks = text_splitter.create_documents([churchill_speech])
# print(len(chunks))

# Embedding Cost

In [None]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total tokens: {total_tokens}')
    print(f'Embedding cost is USD: {total_tokens / 1000 * 0.0004:.6f}')

print_embedding_cost(chunks)

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

vector = embedding.embed_query(chunks[0].page_content)
# print(vector)

## Inserting the Embeddings into a PineCone Index


In [None]:
import pinecone
from langchain_community.vectorstores import Pinecone
pc = pinecone.Pinecone()

for i in pc.list_indexes().names():
    print('Deleting all indexes....', end='')
    pc.delete_index(i)
    print('Done')

In [None]:
index_name = 'churchill-speech'

if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name} ...')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=pinecone.PodSpec(
            environment='gcp-starter'
        )
    )
    print('Done')
    

In [None]:
pc.list_indexes()

In [None]:
vector_store = Pinecone.from_documents(chunks, embedding, index_name=index_name)

In [None]:
# loading the vector store from an existing index
# vector_store = Pinecone.from_existing_index(index_name=index_name, embedding=embedding)



# Asking Questions (Similarity Search)

In [None]:
query = 'Where should we fight?'
result = vector_store.similarity_search(query)
# print(result)

In [None]:
for r in result:
    print(r.page_content)
    print('-' * 50)
    

In [None]:
from langchain.chains import RetrievalQA
# from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=1)

retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})

chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

In [None]:
# query = 'Where should we fight?'
# query = 'Who were the Kings of England and Belgium at the time?' 
query = 'What about the French Armies??'
answer = chain.invoke(query)
print(answer['result'])

# 20240303 - Gemini

- Gemini is a family of hihgly capable multimodal models developed by Google Deepmind.
- A multimodal model is an AI model that can process and understand information from multiple sources, such as text, audio and video.
- Gemini was trained on a massive dataset of images, audio, video, text and code.
- Gemini Model Family:
- 1. Gemini Ultra
     The largest and most capable model.
  3. Gemini Pro
     The best model for scaling
  5. Gemini Nano
     1.8-3.2B Parameters - the most efficient model for on-device (mobile) deployments.

Langchain and Google's Gemini

In [None]:
# install langchain's google gemini libr
pip install --upgrade -q langchain-google-genai

In [None]:
pip show langchain-google-genai


In [None]:
# install google's generative ai lib

pip install -q google-generativeai

In [None]:
pip show google-generativeai

In [None]:
import os
import getpass
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

if 'GOOGLE_API_KEY' not in os.environ:
    os.environ['GOOGLE_API_KEY'] = getpass.getpass('Provide your Google API Key: ')

In [None]:
import google.generativeai as genai
for model in genai.list_models():
    print(model.name)
    

# Integrating Gemini with LangChain


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.9)

response = llm.invoke('Write a paragraph about life on Mars in year 2100.')
print(response.content)

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatGoogleGenerativeAI(model='gemini-pro')

prompt = PromptTemplate.from_template('You are a content creator. Write me a tweet about {topic}')
chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=True
)

topic = 'Why will AI change the world'
response = chain.invoke(input=topic)
print(response) # or response['text']


# System Prompt and Streaming 

In [None]:
from langchain_core.messages import HumanMessage, SystemMessage
llm = ChatGoogleGenerativeAI(model='gemini-pro', convert_system_message_to_human=True)
output = llm.invoke(
    [
        SystemMessage(content='Answer only YES or NO in French.'),
        HumanMessage(content='Is fish a mamal?')
    ]
)

output.content

# Streaming 

In [None]:
llm = ChatGoogleGenerativeAI(model='gemini-pro', temperature=0)
prompt = 'Write a scientific paper outlining the mathematical foundation of our universe.'
# response = llm.invoke(prompt)
# print(response.content)
import time
for chunk in llm.stream(prompt):
    print(chunk.content)
    print('-'*100)
    time.sleep(0.1)

# Multimodal AI with Gemini Pro Vision

In [None]:
pip install -q pillow

In [None]:
from PIL import Image
img = Image.open('./random_image.jpeg')
img

In [None]:
from dotenv import load_dotenv, find_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from PIL import Image
# load env vars
load_dotenv(find_dotenv(), override=True)

img = Image.open('./random_image.jpeg')
img

llm = ChatGoogleGenerativeAI(model='gemini-pro-vision')
prompt = 'What is in this image?'
message = HumanMessage(
    content=[
        {'type': 'text', 'text': prompt},
        {'type': 'image_url', 'image_url': img}
    ]
)

response = llm.invoke([message])
print(response.content)

In [None]:
# pip install pillow
# pip show pillow


In [None]:
def ask_gemini(text, image, model='gemini-pro-vision'):
    llm = ChatGoogleGenerativeAI(model=model)
    message = HumanMessage(
        content=[
            {'type': 'text', 'text': prompt},
            {'type': 'image_url', 'image_url': img}
        ]
    )

    response = llm.invoke([message])
    return response

response = ask_gemini('What is this image content? How can I identify the sport in this picture?', img)
print(response.content)


In [None]:
import requests
from IPython.display import Image
image_url = 'https://picsum.photos/id/40/4106/2806'
content = requests.get(image_url).content
image_data = Image(content)
image_data
response2 = ask_gemini('Describe this image as detailed as possible', image_data)
print(response2.content)

# Jupyter AI
# JupyterLab
