In [1]:
# sentence transformers
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

In [2]:
from llama_index.llms import LlamaCPP

# model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"

llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    verbose=False,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/richy/Library/Caches/llama_index/models/llama-2-13b-chat.Q4_0.gguf (version GGUF V2)
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q4_0     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_0     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_0     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_0     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4

In [39]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(
    llm=llm, embed_model=embed_model
)

In [113]:
from llama_index import download_loader
import nest_asyncio
nest_asyncio.apply()

Reader = download_loader("SitemapReader")

In [114]:
loader = Reader()
doc_url = 'https://insurify.com/sitemap-car-insurance.xml'
documents = loader.load_data(sitemap_url=doc_url)

In [79]:
documents[0].metadata['Source']

'https://insurify.com/car-insurance/does-car-insurance-cover-car-or-driver/'

In [90]:
doc_url_2 = documents[0].metadata['Source']

Reader2 = download_loader('AsyncWebPageReader')
loader = Reader2()
documents2 = loader.load_data(urls=[doc_url_2])

In [91]:
documents2[0]

Document(id_='fe637a4e-1210-4f66-a6a9-782394614041', embedding=None, metadata={'Source': 'https://insurify.com/car-insurance/does-car-insurance-cover-car-or-driver/'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='5c20f04c427f9fcb6e22356f0b4bbc0bac0d6d98a05c52ec6f70823454acc5ef', text='<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="x-ua-compatible" content="ie=edge"/><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"/><link rel="preload" fetchPriority="high" href="https://insurifycdn.com/fonts/dm-sans/v11/rP2Cp2ywxg089UriASitCBimC3YU-Ck.woff2" as="font" type="font/woff2" crossorigin="" data-gatsby-head="true"/><link rel="preload" fetchPriority="high" href="https://insurifycdn.com/fonts/dm-sans/v11/rP2Hp2ywxg089UriCZOIHTWEBlw.woff2" as="font" type="font/woff2" crossorigin="" data-gatsby-head="true"/><meta name="generator" content="Gatsby 5.12.9"/><meta name="title" content="Does C

In [68]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [83]:
VectorStoreIndex.from_documents(
    [documents[0]], storage_context=storage_context, service_context=service_context, show_progress=True
)

Parsing documents into nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/125 [00:00<?, ?it/s]

ValueError: A string literal cannot contain NUL (0x00) characters.

## Postgres

In [87]:
import psycopg2

db_name = "vectors"
host = "localhost"
password = "123"
port = "5432"
user = "richyc"
# conn = psycopg2.connect(connection_string)
conn = psycopg2.connect(
    dbname="postgres",
    host=host,
    password=password,
    port=port,
    user=user,
)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password=password,
    port="5432",
    user=user,
    table_name=doc_url,
    embed_dim=384,
)

In [85]:
from sqlalchemy import make_url
from llama_index.vector_stores import PGVectorStore

url = make_url(PG_URL)
db_name = "sitemap"
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port="5432",
    user=url.username,
    table_name=doc_url,
    embed_dim=384,  # openai embedding dimension
)

In [23]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)

In [92]:
from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext
from llama_index.indices.vector_store import VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    [documents2[0]], storage_context=storage_context, service_context=service_context, show_progress=True
)

Parsing documents into nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/125 [00:00<?, ?it/s]

ValueError: A string literal cannot contain NUL (0x00) characters.

In [24]:
query_engine = index.as_query_engine()
query_engine

<llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x69e6c7df0>

In [27]:
response = query_engine.query("What is SR22?")

In [28]:
print(str(response))

Empty Response


In [None]:
response.metadata

In [None]:
sources = []
for v in response.metadata.values():
    sources.append(v['Source'])
set(sources)

In [None]:
response.response

In [None]:
{v for v in response.metadata.values()}

In [None]:
[v for v in response.metadata.values()]

## Pinecone

In [94]:
import pinecone
PINECONE_API="f9c7ca4d-bb85-4e4c-b456-dee770927767"
PINECONE_ENV="gcp-starter"

pinecone.init(api_key=PINECONE_API, environment=PINECONE_ENV)

In [95]:
pinecone.Index("sitemap")

<pinecone.index.Index at 0x689d1c7f0>

In [115]:
from llama_index.vector_stores import PineconeVectorStore

vector_store = PineconeVectorStore(pinecone.Index("sitemap"))
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_store

PineconeVectorStore(stores_text=True, is_embedding_query=True, flat_metadata=False, api_key=None, index_name=None, environment=None, namespace=None, insert_kwargs={}, add_sparse_vector=False, text_key='text', batch_size=100, remove_text_from_metadata=False)

In [116]:
service_context = ServiceContext.from_defaults(
    llm=llm, embed_model=embed_model
)

index = VectorStoreIndex.from_documents(documents=documents, service_context=service_context, storage_context=storage_context)

Upserted vectors:   0%|          | 0/31719 [00:00<?, ?it/s]

In [117]:
query_engine = index.as_query_engine()
response = query_engine.query("What is SR22?")

In [118]:
print(str(response))

 An SR-22 is a form that confirms your insurance policy meets the state's minimum coverage requirements. If you opt for a liability-only policy, it will offer coverage to pay for damage and injuries you cause to other people and property. If you want coverage to pay for damages to your own vehicle, consider collision coverage and comprehensive coverage.


In [119]:
list({v['Source'] for v in response.metadata.values()})

['https://insurify.com/car-insurance/driver/driving-record/sr22-auto-insurance/']