In [None]:
# Install required packages
!pip install llama-index.core pinecone-client openai llama_index.readers.web llama_index.vector_stores.pinecone llama_index.llms.openai llama-index-embeddings-openai



In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://developer.nvidia.com/blog/managing-ai-inference-pipelines-on-kubernetes-with-nvidia-nim-operator/", "https://developer.nvidia.com/blog/securing-generative-ai-deployments-with-nvidia-nim-and-nvidia-nemo-guardrails/", "https://blogs.nvidia.com/blog/hugging-face-inference-nim-microservices-dgx-cloud/", "https://developer.nvidia.com/blog/a-simple-guide-to-deploying-generative-ai-with-nvidia-nim/", "https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/", "https://developer.nvidia.com/blog/access-to-nvidia-nim-now-available-free-to-developer-program-members/?ncid=ref-dev-240225"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/37 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.api.nvidia.com/nim/reference/deepmind-alphafold2-infer"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/23 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.api.nvidia.com/nim/reference/ipd-proteinmpnn-infer", "https://docs.api.nvidia.com/nim/reference/ipd-rfdiffusion", "https://docs.api.nvidia.com/nim/reference/ipd-rfdiffusion-infer", "https://docs.api.nvidia.com/nim/reference/mit-diffdock" "https://docs.api.nvidia.com/nim/reference/mit-diffdock-infer", "https://docs.api.nvidia.com/nim/reference/nvidia-molmim", "https://docs.api.nvidia.com/nim/reference/vidia-molmim-infer"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/118 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.nvidia.com/nim/index.html", "https://docs.nvidia.com/nim/bionemo/alphafold2/latest/overview.html", "https://docs.nvidia.com/nim/bionemo/alphafold2/latest/quickstart-guide.html", "https://docs.nvidia.com/nim/bionemo/alphafold2/latest/deployment-guide.html", "https://docs.nvidia.com/nim/bionemo/alphafold2/latest/endpoints.html", "https://docs.nvidia.com/nim/bionemo/alphafold2/latest/performance.html"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/30 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.nvidia.com/nim/bionemo/diffdock/latest/overview.html", "https://docs.nvidia.com/nim/bionemo/diffdock/latest/getting-started.html", "https://docs.nvidia.com/nim/bionemo/diffdock/latest/configure-nim.html", "https://docs.nvidia.com/nim/bionemo/diffdock/latest/advanced-usage.html", "https://docs.nvidia.com/nim/bionemo/diffdock/latest/performance.html"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/21 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.nvidia.com/nim/bionemo/rfdiffusion/latest/overview.html", "https://docs.nvidia.com/nim/bionemo/rfdiffusion/latest/prerequisites.html", "https://docs.nvidia.com/nim/bionemo/rfdiffusion/latest/quickstart-guide.html", "https://docs.nvidia.com/nim/bionemo/rfdiffusion/latest/endpoints.html", "https://docs.nvidia.com/nim/bionemo/rfdiffusion/latest/benchmarking.html", "https://docs.nvidia.com/nim/bionemo/rfdiffusion/latest/advanced-usage.html"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/18 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.nvidia.com/nim/bionemo/proteinmpnn/latest/overview.html", "https://docs.nvidia.com/nim/bionemo/proteinmpnn/latest/prerequisites.html", "https://docs.nvidia.com/nim/bionemo/proteinmpnn/latest/quickstart-guide.html", "https://docs.nvidia.com/nim/bionemo/proteinmpnn/latest/endpoints.html", "https://docs.nvidia.com/nim/bionemo/proteinmpnn/latest/benchmarking.html", "https://docs.nvidia.com/nim/bionemo/proteinmpnn/latest/advanced-usage.html"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
# Import required libraries
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

# Set environment variables
def setup_environment():
    os.environ["OPENAI_API_KEY"] = "# Replace with your actual OpenAI API key"
    os.environ["PINECONE_API_KEY"] = "# Replace with you actual Pinecone API key"
    os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"

# Initialize Pinecone
def initialize_pinecone():
    pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

    # Define index parameters
    index_name = "llama-index-nvidia"

    # Create index if it doesn't exist
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Dimension for text-embedding-ada-002
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'
            )
        )

    return pc.Index(index_name), index_name

# Load documents from URLs
def load_documents():
    urls = ["https://docs.nvidia.com/nim/bionemo/molmim/latest/overview.html", "https://docs.nvidia.com/nim/bionemo/molmim/latest/prerequisites.html", "https://docs.nvidia.com/nim/bionemo/molmim/latest/deployment-guide.html", "https://docs.nvidia.com/nim/bionemo/molmim/latest/endpoints.html", "https://docs.nvidia.com/nim/bionemo/molmim/latest/advanced-usage.html"]
    return SimpleWebPageReader(html_to_text=True).load_data(urls)

# Set up vector store and index
def setup_vector_store(index_name):
    # Initialize OpenAI embedding model
    embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

    # Initialize OpenAI LLM for potential query processing
    llm = OpenAI(temperature=0)

    # Initialize Pinecone Vector Store
    vector_store = PineconeVectorStore(
        index_name=index_name,
        environment=os.environ["PINECONE_ENVIRONMENT"]
    )

    # Set up storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Configure global settings
    Settings.llm = llm
    Settings.embed_model = embed_model

    return storage_context

def main():
    # Set up environment variables
    setup_environment()

    # Initialize Pinecone
    pinecone_index, index_name = initialize_pinecone()

    # Load documents
    documents = load_documents()

    # Set up vector store and create index
    storage_context = setup_vector_store(index_name)

    # Create and populate the index
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )

    return index

# Run the script
if __name__ == "__main__":
    index = main()

Upserted vectors:   0%|          | 0/24 [00:00<?, ?it/s]