# Tests of Source Code

In [None]:
import sys
import os
print(os.getcwd())
sys.path.append(r"C:\LlamaIndex\Resume-Creator\src\postgresDB")
from pgstore import CreateDB
db = CreateDB(connection_string="postgresql://postgres:123456@localhost:5432", db_name="resume_db")
db.create_database()

## Read data from a sample pdf

In [None]:
from llama_index.core import SimpleDirectoryReader

# Load the document if the file exists
documents = SimpleDirectoryReader(
	input_dir=r"C:\LlamaIndex\Resume-Creator\data",
).load_data(show_progress=True)

## Store the data in PostGreSQL

In [None]:
import torch
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings
from pgstore import Vector_Store

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={
        "torch_dtype": "float16"
        },
    device=device,
    embed_batch_size=16,
    normalize=True,
    parallel_process=True,
    show_progress_bar=True
)

In [None]:
vs = Vector_Store(connection_string="postgresql://postgres:123456@localhost:5432", db_name="resume_db")
vector_store = vs.create_index()

In [None]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
for doc in documents:
    print('\x00' in doc.get_content())  # Check for null characters

In [None]:
from llama_index.core.schema import Document

cleaned_documents = []
for doc in documents:
    cleaned_text = doc.get_content().replace('\x00', '')
    cleaned_doc = Document(text=cleaned_text, doc_id=doc.doc_id)
    cleaned_documents.append(cleaned_doc)

In [None]:
for doc in cleaned_documents:
    print('\x00' in doc.get_content())  # Check for null characters

In [None]:
index = VectorStoreIndex.from_documents(
    documents=cleaned_documents,
    storage_context=storage_context,
    embed_model=embed_model,
    show_progress=True,
)