# Load bibles and process

In [None]:
import pandas as pd
import os
import glob
import re

In [None]:
project_path = './'

struct = project_path + '00_structures/'
lang_list_fn = struct + 'languages_versions.xml'
book_list_fn = struct + 'book_list.xml'
titles_list_fn = struct + 'titles.xml'
btitles_list_fn = struct + 'book_titles.xml'
pars_list_fn   = struct + 'paragraphs.xml'

# Input path
csv_path = project_path + '01_input/'
# Output paths
tex_path  = project_path + '02_outputs/latex/'
html_path = project_path + '02_outputs/html/'
osis_path = project_path + '02_outputs/osis/'

In [None]:
# Load info
print('Loading structural data:')

print('  - List of books in the Bible with numbers of chapters')
books = pd.read_xml(book_list_fn)

print('  - Available translations')
trans = pd.read_xml(lang_list_fn, attrs_only=True)

print('  - Updated titles of books')
btitles = pd.read_xml(btitles_list_fn)
btitles = pd.melt(btitles, id_vars=['book'])
btitles.rename(columns={"variable": "lang", "value": "title"}, inplace=True)

print('  - Titles of sections')
titles = pd.read_xml(titles_list_fn)
titles = pd.melt(titles, id_vars=['book','chap','verse'])
titles.rename(columns={"variable": "lang", "value": "title"}, inplace=True)
titles['refs'] = titles['book'] + '.' + titles['chap'].astype(str) + '.' + titles['verse'].astype(str)

print('  - Paragraphs before each verse')
pars = pd.read_xml(pars_list_fn)
pars['refs'] = pars['book'] + '.' + pars['chap'].astype(str) + '.' + pars['verse'].astype(str)

print('Done...')

# Show the data
#display(books)
#display(trans)
#display(btitles)
display(titles)
#display(pars)

In [None]:
def pd_to_latex(df, print_titles=True):
    latex_str = ''
    for index, row in df.iterrows():
        try:
            current_verse_num = str(int(row['verse_num']))
        except:
            pass
        if(current_verse_num is None):
            current_verse_num = str(row['verse_num'])
        # Add a paragraph (if necessary)
        current_verse_ref = row['book_abbr'] + '.' + str(row['chapter_num']) + '.' + current_verse_num
        if(current_verse_ref in pars['refs'].values):
            latex_str += '\n\n'
            
        # Add name of the book
        # TODO: Don't insert chapter markers at v1, but at the first paragraph break after that
        if((row['chapter_num'] == 1) & (row['verse_num'] == 1)):
            book_name = row['book_name']
            # Check if the name of the current book needs to be changed/updated
            if((row['book_abbr'] in list(btitles['book'])) & (row['lang'] in list(btitles['lang']))):
                book_name = btitles.loc[(btitles['book'] == row['book_abbr']) & (btitles['lang'] == row['lang']), 'title'].values[0]
            latex_str += '\\nonumchapter{' + str(book_name) + '}\n\n'
            
        # Add a section title (if necessary)
        current_title = titles.loc[(titles['refs'] == current_verse_ref) & (titles['lang'] == row['lang']),'title'].values
        if((current_title.size > 0) & (print_titles == True)):
            if(current_title[0] != None): # Prevents a crash if the titles haven't been translated
                latex_str += '\n\n\\nonumsection{' + current_title[0] + '}\n\n'
            
        # Add the chapter number
        #-----------------------
        # Only add the chapter number as a large capital if it's at the beginning of a paragraph
        if((row['verse_num'] == 1) & (current_verse_ref in pars['refs'].values) & (print_titles == True)):
            latex_str += '\\bibchap{' + str(row['chapter_num']) + '}'
            latex_str += '\\bibverse{' + current_verse_num + '}' + str(row['verse_text']) + ' '
        # If it's not at the beginning of a paragraph, make the chapter number small
        if((row['verse_num'] == 1) & (current_verse_ref not in pars['refs'].values) & (print_titles == True)):
            latex_str += '\\bibverse{' + str(row['chapter_num']) + '.' + current_verse_num + '}' + str(row['verse_text']) + ' '
        # If the titles aren't to be printed, then the chapter numbers will always be a large number
        if((row['verse_num'] == 1) & (print_titles == False)):
            latex_str += '\n\n\\bibchap{' + str(row['chapter_num']) + '}'
            latex_str += '\\bibverse{' + current_verse_num + '}' + str(row['verse_text']) + ' '
        # All other cases, just add the verse number
        if(row['verse_num'] > 1):
            latex_str += '\\bibverse{' + current_verse_num + '}' + str(row['verse_text']) + ' '
        # Old
        #if(row['verse_num'] == 1):
        #    latex_str += '\\bibchap{' + str(row['chapter_num']) + '}'
        #latex_str += '\\bibverse{' + current_verse_num + '}' + str(row['verse_text']) + ' '
        
    # replace double spaces
    latex_str = ' '.join(latex_str.split(' '))
    
    return(latex_str)

def latex_to_file(latex_str, fn):
    with open(fn, 'w', encoding='utf-8') as f:
        f.write(latex_str)
    pass

def process_version(version):
    print('Converting books to latex/html:')
    fn_list = sorted(glob.glob(csv_path + '*' + version + '/*', recursive=True))

    # Iterate over the files in the list
    for fn_i, fn in enumerate(fn_list):
        file_arr = fn.split('\\')
        print('  - ' + file_arr[1] + '/' + file_arr[2][:-4])
        # Read file
        df = pd.read_csv(fn)
        # Convert to latex
        book_tex = pd_to_latex(df, print_titles=False)
        # Save
        if not os.path.exists(tex_path + file_arr[1] + '/'):
            os.makedirs(tex_path + file_arr[1] + '/')
        latex_to_file(book_tex, tex_path + file_arr[1] + '/' + file_arr[2][:-4] + '.tex')
    print('Done...')
    pass

def process_all():
    fn_list = sorted(glob.glob(csv_path + '*/', recursive=False))
    version_list = [] # Just for debug purposes
    for version in fn_list:
        current_version = version.split('\\')[1].split('_')[1]
        process_version(current_version)
        version_list.append(current_version) # Just for debug purposes
    #print('Processed versions:', version_list) # Just for debug purposes
    pass

In [None]:
# Process 1 single version:
#process_version(version)
process_version('ASV')
#process_version('NASB1995')

# Process all
#process_all()