In [1]:
import os
from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings import CohereEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from pinecone import Pinecone as PineconeClient
import requests

In [2]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [2]:
# Keys
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
PINECONE_ENVIRONMENT = os.environ["PINECONE_ENVIRONMENT"]
PINECONE_INDEX_NAME = os.environ["PINECONE_INDEX_NAME"]

KeyError: 'PINECONE_API_KEY'

In [4]:
# Init
pinecone = PineconeClient(api_key=PINECONE_API_KEY,
                         environment=PINECONE_ENVIRONMENT)

embeddings = CohereEmbeddings(model="multilingual-22-12")
vectorstore = Pinecone.from_existing_index(index_name=PINECONE_INDEX_NAME, embedding=embeddings)
retriever = vectorstore.as_retriever()

  warn_deprecated(


In [5]:

# RAG prompt
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# RAG
model = ChatOpenAI(temperature=0, 
                   model="gpt-4-1106-preview")

chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt
    | model
    | StrOutputParser()
)

  warn_deprecated(


In [6]:
chain.invoke("what is film noir?")

'Film noir is a cinematic term used primarily to describe stylish Hollywood crime dramas, particularly those that emphasize cynical attitudes and sexual motivations. The term is French for "black film" and was first applied by French critics to American films that emerged in the post-World War II era, which were marked by a dark, gloomy atmosphere and a sense of pessimism, fatalism, and menace. Classic film noir often presents a world where characters—especially the central protagonist, who is often a hard-boiled detective or a weary private eye—are trapped in a web of crime, corruption, and moral ambiguity. Visually, film noir is known for its high-contrast lighting, use of shadows, and often a black-and-white aesthetic that enhances the sense of danger and moral ambiguity.'

In [7]:
def fetch_wikipedia_page(id):
    url = f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&pageids={id}"
    response = requests.get(url)
    data = response.json()
    page_content = list(data['query']['pages'].values())[0]['extract']
    return page_content

def fetch_url(x):
    urls = [doc.metadata['url'] for doc in x['context']]
    ids = [url.split('=')[-1] for url in urls]
    # First 32k tokens
    contents = [fetch_wikipedia_page(id)[:32000] for id in ids]    
    return {"context": contents, "question": x["question"]}

# RAG
model = ChatOpenAI(temperature=0, 
                   model="gpt-4-1106-preview")

chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | RunnableLambda(fetch_url)  
    | prompt
    | model
    | StrOutputParser()
)

In [8]:
chain.invoke("what is film noir?")

'Film noir is a cinematic term used primarily to describe stylish Hollywood crime dramas, particularly those that emphasize cynical attitudes and sexual motivations. The term is French for "black film," indicative of the dark, somber tone of these movies. Film noir is associated with a low-key, black-and-white visual style that has roots in German Expressionist cinematography. Common themes in film noir include crime, moral ambiguity, and the dark side of human nature. Classic film noir was most prevalent in the 1940s and 1950s, and many of its elements have influenced other genres and the broader aesthetic of film and television storytelling.'

In [10]:
from pinecone import ServerlessSpec
from pinecone import Pinecone as PineconeClient


pc = PineconeClient(api_key=PINECONE_API_KEY,environment=PINECONE_ENVIRONMENT)


index_name = "docs-quickstart-index"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=2,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

In [11]:
index = pc.Index(index_name)

index.upsert(
    vectors=[
        {"id": "vec1", "values": [1.0, 1.5]},
        {"id": "vec2", "values": [2.0, 1.0]},
        {"id": "vec3", "values": [0.1, 3.0]},
    ],
    namespace="ns1"
)

index.upsert(
    vectors=[
        {"id": "vec1", "values": [1.0, -2.5]},
        {"id": "vec2", "values": [3.0, -2.0]},
        {"id": "vec3", "values": [0.5, -1.5]},
    ],
    namespace="ns2"
)

{'upserted_count': 3}

In [12]:
print(index.describe_index_stats())

{'dimension': 2,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 3}, 'ns2': {'vector_count': 3}},
 'total_vector_count': 6}


In [13]:
query_results1 = index.query(
    namespace="ns1",
    vector=[1.0, 1.5],
    top_k=3,
    include_values=True
)

print(query_results1)

query_results2 = index.query(
    namespace="ns2",
    vector=[1.0,-2.5],
    top_k=3,
    include_values=True
)

print(query_results2)


{'matches': [{'id': 'vec1', 'score': 1.0, 'values': [1.0, 1.5]},
             {'id': 'vec2', 'score': 0.868243158, 'values': [2.0, 1.0]},
             {'id': 'vec3', 'score': 0.850068152, 'values': [0.1, 3.0]}],
 'namespace': 'ns1',
 'usage': {'read_units': 6}}
{'matches': [{'id': 'vec1', 'score': 1.0, 'values': [1.0, -2.5]},
             {'id': 'vec3', 'score': 0.998274386, 'values': [0.5, -1.5]},
             {'id': 'vec2', 'score': 0.824041963, 'values': [3.0, -2.0]}],
 'namespace': 'ns2',
 'usage': {'read_units': 6}}
