## Setup Docker container for DB and install lib

In [None]:
!pip install -qU langchain_postgres
# run postgres pg_vector docker with command: docker compose up

## Setup Postgres pg_vector Vector Store and function for importing documents

In [1]:
import os
from app.db.VectorStore import VectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from tqdm import tqdm
from app.db.parser.FileContentProvider import FileContentProvider
from app.db.DocumentMetadataCreator import DocumentMetadataCreator
from app.db.CodeGraph import CodeGraph

def import_source_documents(folder:str, verbose:bool=True, only_php:bool = False, graph: CodeGraph = None):
    # Split documents and store in vector db
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=200
    )

    vectordb = VectorStore.get_vector_store(folder)
    dir = os.path.join("data/", folder)
    document_metadata_creator = DocumentMetadataCreator()
    file_content_provider = FileContentProvider()
    for root, d_names, f_names in os.walk(dir):
        if verbose:
            progress_bar = tqdm(f_names)
        else:
            progress_bar = f_names
        for file in progress_bar:
            if verbose:
                progress_bar.set_description(file)
            if not os.path.isdir(file):
                text = file_content_provider.get_content(root, file, only_php=only_php)

                splits = []
                metadata = document_metadata_creator.create_metadata(root, file, text)
                if graph:
                    graph.add_file(metadata)
                for idx, chunk in enumerate(text_splitter.split_text(str(text))):
                    metadata['chunk_idx'] = idx
                    splits.append(Document(
                        page_content = chunk,
                        metadata = metadata
                    ))
                try:
                    vectordb.add_documents(splits)
                except Exception as e:
                    print(e)


## Load Documentation

In [None]:
vectordb = VectorStore.get_vector_store('documentation')

In [None]:
# drop tables
vectordb.drop_tables()

In [None]:
import_source_documents('documentation')

## Load Codebase

In [None]:
graph = CodeGraph()
import_source_documents('code', verbose=False, graph=graph)

### Export graph

In [5]:
import pickle
with open('data/graph/graph.pickle', 'wb') as handle:
    pickle.dump(graph, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Load graph and test

In [2]:
from app.db.CodeGraph import CodeGraph

code_graph = CodeGraph.load('data/graph/graph.pickle')
rels = code_graph.get_relations("common/components/regex/shareholderList/columnHeader/ColumnHeaderSharesPercentageOnlySharesSignRegexParser.php")
print(rels)

{'parent': 'RegexParser', 'dependencies': ['regex/RegexParser'], 'all_related': ['regex/RegexParser', 'RegexParser']}
