In [79]:
from llama_index import (
    VectorStoreIndex, 
    KnowledgeGraphIndex,
    SimpleDirectoryReader, 
    ServiceContext,
    LLMPredictor,
    set_global_service_context
)
from llama_index.graph_stores import SimpleGraphStore
from llama_index.llms.openai import OpenAI
from tqdm import tqdm
import time, openai, os, re, shutil, ast
import pandas as pd
import numpy as np

from llama_index.node_parser import SimpleNodeParser

# load environment variables
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.environ.get("OPENAI_API_KEY")

os.makedirs('temp', exist_ok=True)
os.makedirs('query_engines', exist_ok=True)

In [80]:
def extract_title_and_creators(filename):
    filename = os.path.join(os.getcwd(), filename)
    with open(filename, 'r', encoding='utf-8') as file:
        content = file.read()

    # Regular expressions to match title and creator(s)
    title_pattern = re.compile(r'Title:\s*(.+?)(?=Creator)', re.MULTILINE | re.DOTALL)
    creator_pattern = re.compile(r'Creator\(s\):\s*(.+?)(?=\n\s+[A-Z][a-zA-Z]*:|$)', re.MULTILINE | re.DOTALL)

    # Regular expression to find instances of "Image of page 1" or "Image of page 2" etc.
    image_pattern = re.compile(r'Image of page \d+')

    # If content has more than 3 instances of "Image of page 1" or "Image of page 2" etc., then it is not a book
    if len(image_pattern.findall(content)) > 3:
        return '', '', False
    else:
        # Extract title and creators from the content
        title_match = title_pattern.search(content)
        title = title_match.group(1).strip() if title_match else ''
        title = [title.strip() for title in title.split('\n') if title.strip()]
        title = ' '.join(title)

        creator_match = creator_pattern.search(content)
        creators = creator_match.group(1).strip() if creator_match else ''
        creators = [creator.strip() for creator in creators.split('\n') if creator.strip()]
        creators = ' - '.join(creators)

        return title, creators, True

In [81]:
chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context = ServiceContext.from_defaults(
    chunk_size_limit=1024,
    llm=chatgpt
)

parser = SimpleNodeParser.from_defaults()

set_global_service_context(service_context)

In [82]:
books = pd.read_csv('filename_category.csv')
categories = pd.read_csv('category_description.csv')
last_index = 0
category_docs = {}
category_index = {}
books.head()

Unnamed: 0,filename,final_bundled_category,title,authors
0,ccel_aaberg_hymnsdenmark.txt,Historical and Biographical Texts,Hymns and Hymnwriters of Denmark,"Aaberg, Jens Christian (1877-1970)"
1,ccel_abelard_misfortunes.txt,Christian Biography,Historia Calamitatum: The Story of My Misfortunes,"Abelard, Peter"
2,ccel_addison_evidences.txt,Theology and Beliefs,"The Evidences of the Christian Religion, with ...","Addison, Joseph (1672-1719)"
3,ccel_adeney_expositoreznehes.txt,Biblical Texts and Commentaries,"The Expositor's Bible: Ezra, Nehemiah, and Esther","Adeney, Walter Frederic (1849-1920)"
4,ccel_adeney_expositorsonglament.txt,Biblical Texts and Commentaries,The Expositor's Bible: The Song of Solomon and...,"Adeney, Walter Frederic (1849-1920)"


In [83]:
books['final_bundled_category'].value_counts()

final_bundled_category
Biblical Texts and Commentaries      171
Theology and Beliefs                 169
Christian Life and Worship           167
Reformed Theology                    144
Miscellaneous                        140
Sermons                              102
Theology                              68
Historical and Biographical Texts     63
Reformed Commentaries                 58
Christian Living                      51
Christian Fiction                     41
Christian Devotional                  12
Early Christian Fathers               12
Christian Biography                   11
Christian Poetry                      11
Early Christian Literature            11
Systematic Theology                   10
Name: count, dtype: int64

In [84]:
categories

Unnamed: 0,Category,Description,Example Titles,Frequent Authors
0,Biblical Texts and Commentaries,"Books focusing on the interpretation, analysis...","[""The Expositor's Bible: Ezra, Nehemiah, and E...","['Calvin, John (1509-1564)', 'Robertson, A. T...."
1,Christian Biography,Biographical accounts of notable Christian fig...,"['Autobiography of George Fox', 'Entire Sancti...","['Voragine, Jacobus de (1230-1298)', 'Fox, Geo..."
2,Christian Devotional,"Books meant for daily Christian reflection, pr...",['A Book of Strife in the Form of the Diary of...,"['MacDonald, George (1824-1905)', 'Flavel, Joh..."
3,Christian Fiction,Fictional works that emphasize Christian theme...,"[""The Pilgrim's Progress"", 'The Innocence of F...","['MacDonald, George (1824-1905)', 'Tolstoy, Le..."
4,Christian Life and Worship,"Books on Christian practices, worship, and dai...","['What I Saw in America', 'A COLLECTION OF LET...","['Spurgeon, Charles Haddon (1834-1892)', 'Sout..."
5,Christian Living,Guides and reflections on leading a Christian ...,"['A Discourse concerning Evangelical Love, Chu...","['Owen, John (1616-1683)', 'Murray, Andrew', '..."
6,Christian Poetry,Poetic works with Christian themes and express...,"['Hymns from the Land of Luther', 'Hymns of Te...","['Bevan, Frances', 'Brownlie, John', 'Walker, ..."
7,Early Christian Fathers,Writings from early Christian leaders and theo...,"['ECF: Aphrahat: Demonstrations', 'ECF: Eunomi...","['Pearse, Roger', 'Roger Pearse']"
8,Early Christian Literature,"Early texts, letters, and documents from the i...",['ANF02. Fathers of the Second Century: Hermas...,"['Schaff, Philip (1819-1893) (Editor)', 'Light..."
9,Historical and Biographical Texts,Accounts of Christian history and biographies ...,"['Biography of John Owen', 'ECF: Possidius: Li...","['Schaff, Philip (1819-1893)', 'Harnack, Adolf..."


In [68]:
categories.loc[categories['Category'] == 'Sermons', 'Example Titles'] = str(np.random.choice(books.loc[books['final_bundled_category'] == 'Sermons', 'title'].unique(), 3).tolist())
categories.loc[categories['Category'] == 'Sermons', 'Frequent Authors'] = str(np.random.choice(books.loc[books['final_bundled_category'] == 'Sermons', 'authors'].unique(), 3).tolist())
categories.loc[categories['Category'] == 'Reformed Commentaries', 'Example Titles'] = str(np.random.choice(books.loc[books['final_bundled_category'] == 'Reformed Commentaries', 'title'].unique(), 3).tolist())
categories.loc[categories['Category'] == 'Reformed Commentaries', 'Frequent Authors'] = str(np.random.choice(books.loc[books['final_bundled_category'] == 'Reformed Commentaries', 'authors'].unique(), 3).tolist())
categories.loc[categories['Category'] == 'Reformed Theology', 'Example Titles'] = str(np.random.choice(books.loc[books['final_bundled_category'] == 'Reformed Theology', 'title'].unique(), 3).tolist())
categories.loc[categories['Category'] == 'Reformed Theology', 'Frequent Authors'] = str(np.random.choice(books.loc[books['final_bundled_category'] == 'Reformed Theology', 'authors'].unique(), 3).tolist())

In [69]:
categories.to_csv('category_description.csv', index=False)

In [85]:
index_summary = []
for row in categories.iterrows():
    index_summary.append(f"{row[1]['Description']} - Includes books like {', '.join(ast.literal_eval(row[1]['Example Titles']))} and authors like {', '.join(ast.literal_eval(row[1]['Frequent Authors']))}")

# save index summary to txt file
with open('index_summary.txt', 'w', encoding='utf-8') as file:
    file.write('\n'.join(index_summary))

In [71]:
for category in categories['Category']:
    print(f"\nIndexing {category}...")
    category_docs[category] = []
    temp_index = VectorStoreIndex([], service_context=service_context)
    for book in tqdm(books.loc[books['final_bundled_category'] == category, 'filename']):
        shutil.copy(f'raw_data/ccel/{book}', 'temp/temp.txt')
        title, authors, validation = extract_title_and_creators(f'raw_data/ccel/{book}')
        documents = SimpleDirectoryReader('temp').load_data()
        for doc in documents:
            doc.metadata['title'] = title
            doc.metadata['authors'] = authors
            category_docs[category].append(doc)
            temp_index.insert(doc)
    temp_index.storage_context.persist(persist_dir=f'query_engines/{category.lower().replace(" ", "_")}')


Indexing Biblical Texts and Commentaries...


  0%|          | 0/171 [00:02<?, ?it/s]


KeyboardInterrupt: 

In [None]:
books.loc[books['final_bundled_category'] == 'Christian Life and Worship', :].to_csv('christian_life_and_worship.csv', index=False)

In [None]:
for category in ['Christian Life and Worship', 'Sermons']:
    print(f"\nIndexing {category}...")
    category_docs[category] = []
    temp_index = VectorStoreIndex([], service_context=service_context)
    for book in tqdm(books.loc[books['final_bundled_category'] == category, 'filename']):
        shutil.copy(f'raw_data/ccel/{book}', 'temp/temp.txt')
        title, authors, validation = extract_title_and_creators(f'raw_data/ccel/{book}')
        documents = SimpleDirectoryReader('temp').load_data()
        for doc in documents:
            doc.metadata['title'] = title
            doc.metadata['authors'] = authors
            category_docs[category].append(doc)
            temp_index.insert(doc)
    temp_index.storage_context.persist(persist_dir=f'query_engines/{category.lower().replace(" ", "_")}')


Indexing Christian Life and Worship...


100%|██████████| 167/167 [21:48<00:00,  7.84s/it]



Indexing Sermons...


100%|██████████| 102/102 [44:18<00:00, 26.06s/it]


In [72]:
with open('urls to the query engines.txt', 'r') as f:
    urls = f.read().split('\n')

urls

['https://biblical-texts-and-commentaries-west2-e4y6sp3yrq-wl.a.run.app',
 'https://christian-biography-west2-e4y6sp3yrq-wl.a.run.app',
 'https://christian-devotional-west2-e4y6sp3yrq-wl.a.run.app',
 'https://christian-fiction-west2-e4y6sp3yrq-wl.a.run.app',
 'https://christian-life-and-worship-west2-e4y6sp3yrq-wl.a.run.app',
 'https://christian-living-west2-e4y6sp3yrq-wl.a.run.app',
 'https://christian-poetry-west2-e4y6sp3yrq-wl.a.run.app',
 'https://early-christian-fathers-west2-e4y6sp3yrq-wl.a.run.app',
 'https://early-christian-literature-west2-e4y6sp3yrq-wl.a.run.app',
 'https://historical-and-biographical-texts-west2-e4y6sp3yrq-wl.a.run.app',
 'https://miscellaneous-west2-e4y6sp3yrq-wl.a.run.app',
 'https://reformed-commentaries-west2-e4y6sp3yrq-wl.a.run.app',
 'https://reformed-theology-west2-e4y6sp3yrq-wl.a.run.app',
 'https://sermons-west2-e4y6sp3yrq-wl.a.run.app',
 'https://systematic-theology-west2-e4y6sp3yrq-wl.a.run.app',
 'https://theology-and-beliefs-west2-e4y6sp3yrq-wl.

In [15]:
with open('index_summary.txt', 'r') as f:
    index_summary = f.read().split('\n')

index_summary

["Books focusing on the interpretation, analysis, and study of biblical texts. - Includes books like The Expositor's Bible: Ezra, Nehemiah, and Esther, The Expositor's Bible: The Song of Solomon and the Lamentations of Jeremiah, The Expositor's Bible: The Epistles of St. John and authors like Calvin, John (1509-1564), Robertson, A. T. (1863-1934), Maclaren, Alexander (1826-1910)",
 'Biographical accounts of notable Christian figures. - Includes books like Autobiography of George Fox, Entire Sanctification, Historia Calamitatum: The Story of My Misfortunes and authors like Voragine, Jacobus de (1230-1298), Fox, George (1624-1691), Clarke, Adam',
 'Books meant for daily Christian reflection, prayer, and meditation. - Includes books like A Book of Strife in the Form of the Diary of an Old Soul, Christ Altogether Lovely, Daily Meditations and Prayers and authors like MacDonald, George (1824-1905), Flavel, John (1630-1691), Bradford, John (1510-1555)',
 "Fictional works that emphasize Chris

In [61]:
from langchain.tools import Tool

import requests

In [78]:
n = 0

for i in os.listdir('query_engines'):
    print('\n', i.replace('_', ' ').capitalize())
    print(i, urls[n]+'/query')
    print(index_summary[n])
    n += 1


 Biblical texts and commentaries
biblical_texts_and_commentaries https://biblical-texts-and-commentaries-west2-e4y6sp3yrq-wl.a.run.app/query
Books focusing on the interpretation, analysis, and study of biblical texts. - Includes books like The Expositor's Bible: Ezra, Nehemiah, and Esther, The Expositor's Bible: The Song of Solomon and the Lamentations of Jeremiah, The Expositor's Bible: The Epistles of St. John and authors like Calvin, John (1509-1564), Robertson, A. T. (1863-1934), Maclaren, Alexander (1826-1910)

 Christian biography
christian_biography https://christian-biography-west2-e4y6sp3yrq-wl.a.run.app/query
Biographical accounts of notable Christian figures. - Includes books like Autobiography of George Fox, Entire Sanctification, Historia Calamitatum: The Story of My Misfortunes and authors like Voragine, Jacobus de (1230-1298), Fox, George (1624-1691), Clarke, Adam

 Christian devotional
christian_devotional https://christian-devotional-west2-e4y6sp3yrq-wl.a.run.app/quer

In [17]:

def ask_question(question, url):
    return requests.post(url, json={'question': question})

# using this function as base, write a function for each of the urls in the list above
def ask_question_christian_life_and_worship(question):
    return ask_question(question, urls[0])

def ask_question_sermons(question):
    return ask_question(question, urls[1])


In [None]:
tools = [
    Tool(
        name="UXR_Librarian",
        func=self.vector_index.query,
        description="Main knowledge base for all the UX Research conducted at Roku since Aug 2021"
    )
]

In [30]:
output = response.json()
print(output['response'])

Predestination, as discussed in the context, refers to God's foreknowledge and foreordination of believers. It emphasizes that God knew and chose certain individuals to be conformed to the image of His Son and be saved. This predestination is based on God's purpose and plan, and it is not contingent or subject to doubt. The passage does not provide details on how or why God foreknew or predestined individuals, but it highlights that their salvation is entirely attributed to God's grace and design.


In [47]:
import string

string.ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [59]:
sources = {}
alphabet = string.ascii_lowercase
a = 0

for source in output['source_nodes']:
    if source['node']['metadata']['title'] not in sources.keys():
        sources[source['node']['metadata']['title']] = {'authors': source['node']['metadata']['authors'], 'score': [f"{alphabet[a]}. {round(source['score'], 3)}"]}
    else:
        sources[source['node']['metadata']['title']]['score'].append(f"{alphabet[a]}. {round(source['score'], 3)}")
    a += 1

source_text = "Sources:\n"
n = 1

for source in sources.keys():
    source_text += f"{n}. {source} by {sources[source]['authors']} - Confidence: {', '.join(sources[source]['score'])}\n"
    n += 1

print(source_text)

Sources:
1. Wesley's Notes on the Bible by Wesley, John (1703-1791) - Confidence: a. 0.873
2. Barnes' New Testament Notes by Barnes, Albert - Confidence: b. 0.853, e. 0.847, g. 0.84, i. 0.837, j. 0.836
3. Commentary Critical and Explanatory on the Whole Bible by Jamieson, Robert - Confidence: c. 0.851, h. 0.838
4. Jeremiah: Being the Baird Lecture for 1922 by Smith, George Adam (1856-1942) - Confidence: d. 0.85, f. 0.844



In [32]:
books = []
source_text = "Sources:\n"
n = 1
for source in output['metadata']:
    if output['metadata'][source]['title'] not in books:
        books.append(output['metadata'][source]['title'])
        source_text += f"{n}. {output['metadata'][source]['title']} by {output['metadata'][source]['authors']}\n"
        n += 1

print(source_text)

Sources:
1. Wesley's Notes on the Bible by Wesley, John (1703-1791)
2. Barnes' New Testament Notes by Barnes, Albert
3. Commentary Critical and Explanatory on the Whole Bible by Jamieson, Robert
4. Jeremiah: Being the Baird Lecture for 1922 by Smith, George Adam (1856-1942)

