In [1]:
# conda install psycopg2

sudo -i -u postgres
psql
ALTER USER postgres WITH PASSWORD 'test123';
create extension vector;
\dx

In [2]:
from llama_index import SimpleDirectoryReader
documents = SimpleDirectoryReader("./data/").load_data()

In [3]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index import ServiceContext
from llama_index.embeddings import LangchainEmbedding
from llama_index import set_global_service_context

from llama_index.llms import HuggingFaceLLM
import torch

# model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
model_name = "mistralai/Mistral-7B-v0.1"
# model_name = "microsoft/phi-2"

llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map="cuda",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.bfloat16}
)

embed_model = LangchainEmbedding(
  HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en")
)

service_context = ServiceContext.from_defaults(
    embed_model=embed_model,
    llm=llm,
)

set_global_service_context(service_context)

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [4]:
import psycopg2

connection_string = "postgresql://postgres:test123@localhost:5432"
db_name = "vector_db"
conn = psycopg2.connect(connection_string)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

In [5]:
from sqlalchemy import make_url
from llama_index import StorageContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore



# dimensions of text-ada-embedding-002
d = 768


url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="memento",
    embed_dim=d,
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)


Parsing nodes:   0%|          | 0/156 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/156 [00:00<?, ?it/s]

In [None]:
# index.storage_context.persist()