# Vault functionalities

In this example we will use the Ecce Hommo book from Walter Kauffman. 
It is an scanned work so there is some uncertainty in the OCR that we should overcome with some rules of thumb and basic statistics.


In [None]:
%load_ext autoreload
%autoreload 2

import fitz
from obsidianizer.obsidian.vault import load_vault, save_vault
from obsidianizer.obsidian.transformations import join_vaults, create_common_words_vault
from obsidianizer.obsidian.journal_tools import get_vault_df_from_journal, get_journal_entries_from_vault
from obsidianizer.latex_tools.utils import load_drafts_entries
from obsidianizer.latex_tools.journal_processing import get_sentences
from obsidianizer.pdf_tools.ecce_homo import is_ecce_hommo_chapter, is_ecce_hommo_subsection
from obsidianizer.pdf_tools.documents import get_book_filtered_blocks, extract_book_annotations
from obsidianizer.obsidian.pdf_tools import get_vault_df_from_pdf
from obsidianizer.latex_tools.utils import load_drafts_entries, save_cleaned_sentences_to_latex, print_differences_in_journals
from obsidianizer import EXAMPLE_VAULT_NIETZSCHE_PATH, EXAMPLE_VAULT_SAVE_TO__PATH, EXAMPLE_VAULT_PATH
from obsidianizer import EXAMPLE_JOURNAL_PATH, EXAMPLE_CLEANED_JOURNAL_PATH, EXAMPLE_ECCE_HOMMO_PDF_PATH, EXAMPLE_ECCE_HOMMO_VAULT_PATH

# 1 Loading and saving vaults

Explain the structure of the dataframe.

## 1.1 Load vault folder

The loaded vault could be different from the one to write since the filepaths could be overlappings.

In [None]:
filepath = EXAMPLE_VAULT_NIETZSCHE_PATH

In [None]:
filepath

In [None]:
vault_files = load_vault(filepath)
vault_files

## 1.2 Store vault 

In [None]:
saved_vault_path = EXAMPLE_VAULT_SAVE_TO__PATH
save_vault(vault_files, saved_vault_path)

# 2. Creating vaults

## 2.1 From list of words

In [None]:
common_words_vault = create_common_words_vault(["Hello", "my", "friend"], "./my_fake_vault")
common_words_vault

## 2.2 From journal

Convert the journal dataframe into the equivalent vault dataframe.

In [None]:
journal_filepath = EXAMPLE_JOURNAL_PATH
vault_filepath  = EXAMPLE_VAULT_PATH

In [None]:
journal_df = load_drafts_entries(journal_filepath)
journal_df = get_sentences(journal_df)

In [None]:
journal_df

In [None]:
vault_journal_df = get_vault_df_from_journal(journal_df, vault_filepath)

In [None]:
vault_journal_df

In [None]:
save_vault(vault_journal_df)

### Get data_entries from vault

Convert a vault into the given entries so that we can have different iterations of the journal from obsidian and back in order to add the labelling.

In [None]:
journal_df_reloaded = get_journal_entries_from_vault(vault_journal_df)
journal_df_reloaded = get_sentences(journal_df_reloaded)

In [None]:
journal_df_reloaded

In [None]:
weird_indices,weird_sentence_within_index  = print_differences_in_journals(journal_df_reloaded, journal_df)

In [None]:
weird_indices

## 2.3 From pdfs

In [None]:
doc = fitz.open(EXAMPLE_ECCE_HOMMO_PDF_PATH) 
book = [doc[i] for i in range(224,334)]

chapter_blocks = get_book_filtered_blocks(book, is_ecce_hommo_chapter)
subsection_blocks = get_book_filtered_blocks(book, is_ecce_hommo_subsection)
annotations_blocks = extract_book_annotations(book)

In [None]:
ecce_hommo_vault = get_vault_df_from_pdf(chapter_blocks, subsection_blocks, annotations_blocks, EXAMPLE_ECCE_HOMMO_VAULT_PATH)

In [None]:
ecce_hommo_vault

# 3. Vault transformations

## Join vaults

In [None]:
ultimate_vault = join_vaults(vault_df, common_words_vault)