<a href="https://colab.research.google.com/github/ryderwishart/biblical-machine-learning/blob/main/gpt-inferences/greek-hebrew-tsv-qa-agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Question answer over MACULA Greek and Hebrew

In [1]:
# !pip install tabulate pandas langchain

## Set up MACULA dataframes
Let's load up the Greek and Hebrew data and populate the domain column with the domain labels instead of numbers.

In [2]:
import requests, json, re, os
import pandas as pd

def download_file(url, file_name):
    response = requests.get(url)
    with open(file_name, "wb") as file:
        file.write(response.content)

# file1_url = "https://raw.githubusercontent.com/Clear-Bible/macula-greek/main/Nestle1904/TSV/macula-greek.tsv"
file1_url = "https://github.com/Clear-Bible/macula-greek/raw/feature/add-sentence-id-to-tsv/Nestle1904/TSV/macula-greek.tsv" # PR version with sentence IDs
file1_name = "macula-greek.tsv"

# file1_url = "https://raw.githubusercontent.com/Clear-Bible/macula-greek/main/Nestle1904/TSV/macula-greek.tsv"
file2_url = "https://raw.githubusercontent.com/Clear-Bible/macula-greek/main/sources/MARBLE/SDBG/marble-domain-label-mapping.json"
file2_name = "marble-domain-label-mapping.json"

if file1_name not in os.listdir():
    download_file(file1_url, file1_name)

if file2_name not in os.listdir():
    download_file(file2_url, file2_name)

# Import Macula Greek data
mg = pd.read_csv(
    "macula-greek.tsv", index_col="xml:id", sep="\t", header=0, converters={"*": str}
).fillna("missing")
# add an 'id' column
mg["id"] = mg.index

# mg['domain'] = mg['domain'].astype(str).fillna('missing')

# Extract book, chapter, and verse into separate columns
mg[["book", "chapter", "verse"]] = mg["ref"].str.extract(r"(\d?[A-Z]+)\s(\d+):(\d+)")

# Add columns for book + chapter, and book + chapter + verse for easier grouping
mg["book_chapter"] = mg["book"] + " " + mg["chapter"].astype(str)
mg["book_chapter_verse"] = mg["book_chapter"] + ":" + mg["verse"].astype(str)

# Import domain-label mapping
# Open the JSON file
with open("marble-domain-label-mapping.json", "r") as f:
    # Load the contents of the file as a dictionary
    domain_labels = json.load(f)

domain_labels["missing"] = "no domain"
domain_labels["nan"] = "no domain"

# Use domain labels to create a new column


def get_domain_label(domain_string_number):
    labels = [domain_labels[label] for label in domain_string_number.split(" ")]
    return labels


mg["domain_label"] = mg["domain"].apply(get_domain_label)
mg.head()

# MACULA Hebrew URL: https://media.githubusercontent.com/media/Clear-Bible/macula-hebrew/main/TSV/macula-hebrew.tsv

file3_url = "https://raw.githubusercontent.com/Clear-Bible/macula-hebrew/main/TSV/macula-hebrew.tsv"
file3_name = "macula-hebrew.tsv"

file4_url = "https://raw.githubusercontent.com/Clear-Bible/macula-hebrew/main/sources/MARBLE/SDBH/domain-label-mapping-1.json"
file4_name = "domain-label-mapping-1.json"

if file3_name not in os.listdir():
    # This is a GIT LFS file, so we need to ensure the LFS protocol is used (which wget seems to resolve automatically)
    !wget https://media.githubusercontent.com/media/Clear-Bible/macula-hebrew/main/TSV/macula-hebrew.tsv
    
if file4_name not in os.listdir():
    download_file(file4_url, file4_name)

# Import Macula Hebrew data
mh = pd.read_csv(
    "macula-hebrew.tsv", index_col="xml:id", sep="\t", header=0, converters={"*": str}
).fillna("missing")
# add an 'id' column
mh["id"] = mh.index

# mh['domain'] = mh['domain'].astype(str).fillna('missing')

# Extract book, chapter, and verse into separate columns
mh[["book", "chapter", "verse"]] = mh["ref"].str.extract(r"(\d?[A-Z]+)\s(\d+):(\d+)")
# Add columns for book + chapter, and book + chapter + verse for easier grouping
mh["book_chapter"] = mh["book"] + " " + mh["chapter"].astype(str)
mh["book_chapter_verse"] = mh["book_chapter"] + ":" + mh["verse"].astype(str)

# Import domain-label mapping to Hebrew as well using SBDH domain mapping
# Open the JSON file
with open("domain-label-mapping-1.json", "r") as f:
    # Load the contents of the file as a dictionary
    hebrew_domain_labels = json.load(f)

def get_hebrew_domain_label(domain_string_number):
    labels = [hebrew_domain_labels[label] for label in domain_string_number.split(">") if label in hebrew_domain_labels]
    return labels

mh["domain_label"] = mh["lexdomain"].apply(get_hebrew_domain_label)

mh.head()
print("Macula Greek column names: ", mg.columns)
print("Macula Hebrew column names: ", mh.columns)

  mh = pd.read_csv(


Macula Greek column names:  Index(['ref', 'role', 'class', 'type', 'gloss', 'text', 'after', 'lemma',
       'normalized', 'strong', 'morph', 'person', 'number', 'gender', 'case',
       'tense', 'voice', 'mood', 'degree', 'domain', 'ln', 'frame', 'subjref',
       'referent', 'id', 'book', 'chapter', 'verse', 'book_chapter',
       'book_chapter_verse', 'domain_label'],
      dtype='object')
Macula Hebrew column names:  Index(['ref', 'class', 'text', 'transliteration', 'after', 'strongnumberx',
       'stronglemma', 'sensenumber', 'greek', 'greekstrong', 'gloss',
       'english', 'mandarin', 'stem', 'morph', 'lang', 'lemma', 'pos',
       'person', 'gender', 'number', 'state', 'type', 'lexdomain',
       'contextualdomain', 'coredomain', 'sdbh', 'extends', 'frame', 'subjref',
       'participantref', 'id', 'book', 'chapter', 'verse', 'book_chapter',
       'book_chapter_verse', 'domain_label'],
      dtype='object')


## Set up QA agent

Use OpenAI

In [3]:
import getpass
secret_key = getpass.getpass('Enter OpenAI secret key: ')
os.environ['OPENAI_API_KEY'] = secret_key

Or use Vicuna via Shale Protocol (free tier)

In [4]:
# os.environ['OPENAI_API_BASE'] = "https://shale.live/v1"
# shale_api = getpass.getpass('Enter Shale API key: ')
# os.environ['OPENAI_API_KEY'] = shale_api

In [5]:
import pandas as pd
from langchain.llms import OpenAI
from langchain.agents import create_pandas_dataframe_agent

In [6]:
multi_df_agent = create_pandas_dataframe_agent(
    OpenAI(temperature=0), 
    [mg, mh], # list of dataframes, here mg = Macula Greek, mh = Macula Hebrew
    verbose=True,
    max_tokens_limit = 3000,
    max_tokens_per_prompt = 1000,
)

In [7]:
# multi_df_agent.run("How many unique domain labels are there (each domain label is a list)?")

In [8]:
# multi_df_agent.run("If you group all the rows by book, which books do not have some form of 'lord' or 'Lord' in their aggregated gloss column?")

This was a bit shocking to me, but neither First, Second, or Third John, or Titus contain the word 'Lord'! To double check this, look at the vocab list through [Scaife](https://vocab.perseus.org/word-list/urn:cts:greekLit:tlg0031.tlg017.perseus-grc2/?page=all)

## Additional functionality

### List building

Now let's try to generate a list of results and output as TSV or plaintext. 

First, let's create a new dataframe that contains the aggregate verses.

In [9]:
# Long book names to USFM (3 uppercase letters) format
book_name_mapping = {
    "Genesis": "GEN",
    "Exodus": "EXO",
    "Leviticus": "LEV",
    "Numbers": "NUM",
    "Deuteronomy": "DEU",
    "Joshua": "JOS",
    "Judges": "JDG",
    "Ruth": "RUT",
    "1 Samuel": "1SA",
    "2 Samuel": "2SA",
    "1 Kings": "1KI",
    "2 Kings": "2KI",
    "1 Chronicles": "1CH",
    "2 Chronicles": "2CH",
    "Ezra": "EZR",
    "Nehemiah": "NEH",
    "Esther": "EST",
    "Job": "JOB",
    "Psalms": "PSA",
    "Psalm": "PSA",
    "Proverbs": "PRO",
    "Ecclesiastes": "ECC",
    "Song of Solomon": "SNG",
    "Isaiah": "ISA",
    "Jeremiah": "JER",
    "Lamentations": "LAM",
    "Ezekiel": "EZK",
    "Daniel": "DAN",
    "Hosea": "HOS",
    "Joel": "JOL",
    "Amos": "AMO",
    "Obadiah": "OBA",
    "Jonah": "JON",
    "Micah": "MIC",
    "Nahum": "NAM",
    "Habakkuk": "HAB",
    "Zephaniah": "ZEP",
    "Haggai": "HAG",
    "Zechariah": "ZEC",
    "Malachi": "MAL",
    "Matthew": "MAT",
    "Mark": "MRK",
    "Luke": "LUK",
    "John": "JHN",
    "Acts": "ACT",
    "Romans": "ROM",
    "1 Corinthians": "1CO",
    "2 Corinthians": "2CO",
    "Galatians": "GAL",
    "Ephesians": "EPH",
    "Philippians": "PHP",
    "Colossians": "COL",
    "1 Thessalonians": "1TH",
    "2 Thessalonians": "2TH",
    "1 Timothy": "1TI",
    "2 Timothy": "2TI",
    "Titus": "TIT",
    "Philemon": "PHM",
    "Hebrews": "HEB",
    "James": "JAS",
    "1 Peter": "1PE",
    "2 Peter": "2PE",
    "1 John": "1JN",
    "2 John": "2JN",
    "3 John": "3JN",
    "Jude": "JUD",
    "Revelation": "REV"
}
reverse_book_name_mapping = {v:k for k, v in book_name_mapping.items()}

In [10]:
verse_df = mg.groupby("book_chapter_verse").agg({
    'gloss': lambda x: ' '.join(x),
    'lemma': lambda x: ' '.join(x),
    'normalized': lambda x: ' '.join(x)
}).reset_index()


# Add 'book' and 'book_chapter' columns to verse_df. use reverse_book_name_mapping for book, and then add new book value to chapter value
verse_df[["book","chapter"]] = verse_df["book_chapter_verse"].str.extract(r"(\d?[A-Z]+)\s(\d+)")

# Translate book abbreviation to full book name
verse_df["book"] = verse_df["book"].map(reverse_book_name_mapping)

# Combine book name and chapter number
verse_df["chapter"] = verse_df["book"] + " " + verse_df["chapter"]

verse_df.head()

Unnamed: 0,book_chapter_verse,gloss,lemma,normalized,book,chapter
0,1CO 10:1,Not I want for you to be ignorant brothers tha...,οὐ θέλω γάρ σύ ἀγνοέω ἀδελφός ὅτι ὁ πατήρ ἐγώ ...,Οὐ θέλω γάρ ὑμᾶς ἀγνοεῖν ἀδελφοί ὅτι οἱ πατέρε...,1 Corinthians,1 Corinthians 10
1,1CO 10:10,Neither are you to grumble as some of them gru...,μηδέ γογγύζω καθάπερ τὶς αὐτός γογγύζω καί ἀπό...,μηδέ γογγύζετε καθάπερ τινές αὐτῶν ἐγόγγυσαν κ...,1 Corinthians,1 Corinthians 10
2,1CO 10:11,These things now [as] types happened to them w...,οὗτος δέ τυπικώς συμβαίνω ἐκεῖνος γράφω δέ πρό...,ταῦτα δέ τυπικῶς συνέβαινεν ἐκείνοις ἐγράφη δέ...,1 Corinthians,1 Corinthians 10
3,1CO 10:12,Therefore the [one] thinking to stand let him ...,ὥστε ὁ δοκέω ἵστημι βλέπω μή πίπτω,Ὥστε ὁ δοκῶν ἑστάναι βλεπέτω μή πέσῃ,1 Corinthians,1 Corinthians 10
4,1CO 10:13,Temptation you not has seized if not what is c...,πειρασμός σύ οὐ λαμβάνω εἰ μή ἀνθρώπινος πιστό...,πειρασμός ὑμᾶς οὐκ εἴληφεν εἰ μή ἀνθρώπινος πι...,1 Corinthians,1 Corinthians 10


In [39]:
# Save verse_df, mg, and mh so I can load them up later without preprocessing
!mkdir -p preprocessed-macula-dataframes

verse_df.to_csv("preprocessed-macula-dataframes/verse_df.csv", index=False)
mg.to_csv("preprocessed-macula-dataframes/mg.csv", index=False)
mh.to_csv("preprocessed-macula-dataframes/mh.csv", index=False)

# to load one of these use pd.read_csv("preprocessed-macula-dataframes/verse_df.csv")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [12]:
macula_greek_agent = create_pandas_dataframe_agent(
    OpenAI(temperature=0), 
    # mg, # verse_df (?) 
    verse_df,
    verbose=True,
    max_tokens_limit = 3000,
    max_tokens_per_prompt = 1000,
)

We have to be pretty specific in the instructions we give the model for now, since it has to be able to understand what we might want to do with a Pandas dataframe. Thus, the following prompt breaks down a couple of steps:
1. Group the data in some way
2. Filter by criterion A
3. Filter again by criterion B

In [13]:
list_building_prompt = """List all the verses that mention 'Jesus' and 'Peter' in the gloss column."""


# list_building_prompt = """Which verses mention 'Jesus' and 'Πέτρος'?"""

# macula_greek_agent.run(list_building_prompt)

Great, so we were able to get the Agent to list out the findings, without actually having to write Python code to manipulate the Pandas dataframe ourselves!

Now let's try adding an output formatting directive to the prompt, namely, 
- Output as CSV
- Use a particular set of headings

In [14]:
list_building_and_save_as_csv_prompt = list_building_prompt + """\
Please return the results in a CSV with the following columns: \
'book_chapter_verse', 'gloss'. \
"""

# macula_greek_agent.run(list_building_and_save_as_csv_prompt)

Check the folder where this notebook is stored, and you should actually see a file called `result.csv` or `results.csv`, which contains the data we just extracted from the original dataframes, but we only had to query in natural language!

# Expand functionality for more tools using DB lookups

In [15]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

In [16]:
# bible_persist_directory = '/Users/ryderwishart/genesis/databases/berean-bible-database'
bible_persist_directory = '/Users/ryderwishart/genesis/databases/berean-bible-database'
bible_chroma = Chroma("berean-bible", embeddings, persist_directory=bible_persist_directory)
print(bible_chroma.search('jesus speaks to peter', search_type='similarity', k=1))

encyclopedic_persist_directory = '/Users/ryderwishart/biblical-machine-learning/gpt-inferences/databases/encyclopedic'
encyclopedic_chroma = Chroma(persist_directory=encyclopedic_persist_directory, embedding_function=embeddings, collection_name="encyclopedic")
print(encyclopedic_chroma.similarity_search_with_score('What is a sarcophagus?', search_type='similarity', k=1))

theology_persist_directory = '/Users/ryderwishart/biblical-machine-learning/gpt-inferences/databases/theology'
theology_chroma = Chroma("theology", embeddings, persist_directory=theology_persist_directory)
print(theology_chroma.search('jesus speaks to peter', search_type='similarity', k=1))

# # persist_directory = '/Users/ryderwishart/genesis/databases/itemized-prose-contexts copy' # NOTE: Itemized prose contexts are in this db
# persist_directory = '/Users/ryderwishart/genesis/databases/prose-contexts' # NOTE: Full prose contexts are in this db
# context_chroma = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="prosaic_contexts_itemized")
# print(context_chroma.similarity_search_with_score('jesus (s) speaks (v) to peter (o)', search_type='similarity', k=1))

persist_directory = "/Users/ryderwishart/genesis/databases/prose-contexts-shorter-itemized"
context_chroma = Chroma(persist_directory=persist_directory, embedding_function=embeddings, collection_name="prosaic_contexts_shorter_itemized")
print(context_chroma.similarity_search_with_score('jesus (s) speaks (v) to peter (o)', search_type='similarity', k=1))

Using embedded DuckDB with persistence: data will be stored in: /Users/ryderwishart/genesis/databases/berean-bible-database


Using embedded DuckDB with persistence: data will be stored in: /Users/ryderwishart/biblical-machine-learning/gpt-inferences/databases/encyclopedic
Using embedded DuckDB with persistence: data will be stored in: /Users/ryderwishart/biblical-machine-learning/gpt-inferences/databases/theology


[Document(page_content='“But what about you?” Jesus asked. “Who do you say I am?” Peter answered, “The Christ of God.”', metadata={'source': 'Luke 9:20', 'usfm': 'LUK 9:20'})]
[(Document(page_content='Sometimes numerous small tombs, each one resembling somewhat a _kôk_, were\ncut in a hillside. Archæologists call such a group of tombs a\n“columbarium”; (see Fig. 230).', metadata={'source': 'encyclopedic/43070-0.txt'}), 1.1794637441635132)]


Using embedded DuckDB with persistence: data will be stored in: /Users/ryderwishart/genesis/databases/prose-contexts-shorter-itemized


[Document(page_content='\ufeffThe Project Gutenberg EBook of Archæology and the Bible, by George A. Barton\n\nThis eBook is for the use of anyone anywhere at no cost and with\nalmost no restrictions whatsoever.  You may copy it, give it away or\nre-use it under the terms of the Project Gutenberg License included\nwith this eBook or online at www.gutenberg.org\n\n\nTitle: Archæology and the Bible\n\nAuthor: George A. Barton\n\nRelease Date: July 1, 2013 [EBook #43070]\n\nLanguage: English\n\nCharacter set encoding: UTF-8\n\n*** START OF THIS PROJECT GUTENBERG EBOOK ARCHÆOLOGY AND THE BIBLE ***\n\n\nProduced by The Online Distributed Proofreading Team at\nhttp://www.pgdp.net (This file was produced from images\ngenerously made available by The Internet Archive.)\n\nARCHÆOLOGY AND THE BIBLE\n\n\n[Illustration: A HILLSIDE STREET IN ROMAN JERUSALEM ALONG WHICH JESUS AND\nTHE DISCIPLES MAY WELL HAVE WALKED (_after Germer-Durand_).\n\n--_Frontispiece._]\n\n\n  Green Fund Book, No. 17\n\n\n  A

## Get Syntax brackets

In [18]:
!pip install lxml

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [27]:
# Get the plain treedown representation for a token's sentence

from lxml import etree

# Get the plain treedown representation for a token's sentence

# example endpoint: "https://labs.clear.bible/symphony-dev/api/GNT/Nestle1904/lowfat?usfm-ref=JHN%2014:1" - JHN 14:1

def process_element(element, usfm_ref, indent=0, brackets=False):
    if brackets:
        indent = 0
    treedown_str = ""
    open_bracket = "[" if brackets else ""
    close_bracket = "] " if brackets else ""

    if element.get("class") == "cl":
        treedown_str += "\n" + open_bracket + ("  " * indent)

    if element.get("role"):
        role = element.attrib["role"]
        if role == "adv":
            role = "+"
        if not brackets:
            treedown_str += "\n"
        treedown_str += open_bracket + ("  " * indent) + role + ": "

    # # bold the matching token using usfm ref # NOTE: not applicable, since I think you have to use a USFM ref without the word
    # if element.tag == "w" and element.get("ref") == usfm_ref:
    #     treedown_str += "**" + element.text + "**"
    #     treedown_str += element.attrib.get("after", "") + close_bracket

    if element.tag == "w" and element.text:
        treedown_str += element.attrib.get("gloss", "") + element.attrib.get("after", "") + f"({element.text})"
        treedown_str += close_bracket

    for child in element:
        treedown_str += process_element(child, usfm_ref, indent + 1, brackets)

    return treedown_str

def get_treedown_by_ref(usfm_ref, brackets=True):
    usfm_passage = usfm_ref.split("!")[0]
    endpoint = (
        "https://labs.clear.bible/symphony-dev/api/GNT/Nestle1904/lowfat?usfm-ref="
        # "http://localhost:8984/symphony-dev/api/GNT/Nestle1904/lowfat?usfm-ref="
        + usfm_passage
    )
    
    # uri encode endpoint
    endpoint = requests.utils.requote_uri(endpoint)
    
    # print(endpoint)

    text_response = requests.get(endpoint).text
    xml = etree.fromstring(text_response.encode("utf-8"))

    treedown = process_element(xml, usfm_passage, brackets=brackets)
    return treedown

def get_syntax_for_query(query):
    # Get passage using bible passage lookup and grabbing first result metadata['usfm']
    most_relevant_passage_usfm_ref = bible_chroma.search(query, search_type='similarity', k=1)[0].metadata['usfm']
    return get_treedown_by_ref(most_relevant_passage_usfm_ref)

In [28]:
print(get_syntax_for_query('jesus speaks to peter'))

then (δὲ)] 
[[v: He said (εἶπεν)] [io: to them (αὐτοῖς)] [+: however (δὲ)] 
[[s: You (Ὑμεῖς)] 
[[o: [p: whom (τίνα)] [s: Me (με)] [vc: to be;(εἶναι)] [v: do pronounce (λέγετε)] then (δὲ)] 
[[s: Peter (Πέτρος)] 
[[+: [v: answering (ἀποκριθεὶς)] [v: said (εἶπεν)] 
[[o: [p: The (Τὸν)] Christ (Χριστὸν)] - (τοῦ)] of God.(Θεοῦ)] 


## Use self-querying for more specific data results

If we want to use a self-query tool to filter results with metadata, use this code:

In [29]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.chains.llm import LLMChain
from langchain.chains.query_constructor import ir
from langchain.chains.query_constructor.parser import QueryTransformer
from langchain.chains.query_constructor.prompt import DEFAULT_SCHEMA

metadata_field_info=[
    AttributeInfo(
        name="source",
        description="File path to the source document", 
        type="string", 
    ),
    # AttributeInfo(
    #     name="data_scope",
    #     description="Type/scope of linguistic data in document, one of [Syntax, Lexical, Discourse, Social-Situational, Cultural-encyclopedic]. Always pick one of these as a filter.",
    #     type="string", 
    # ),
    AttributeInfo(
        name="verse_ref",
        description="Complete BOK CH:VS reference for verse (in USFM format)",
        type="string", 
    ),
    AttributeInfo(
        name="book",
        description="Book name",
        type="string", 
    ),
    AttributeInfo(
        name="chapter",
        description="Chapter number",
        type="integer", 
    ),
    AttributeInfo(
        name="verse",
        description="Verse number",
        type="integer", 
    ),
]
document_content_description = "Linguistic data about a bible verse"
# retriever = SelfQueryRetriever.from_llm(llm, context_chroma, document_content_description, metadata_field_info, verbose=True)

The downside of the self-query path is that it involves more LLM calls in theory. However, it might be worth exploring in the future whether a single massive Chroma DB with richer metadata isn't more efficient than a bunch of smaller ones, provided the tool associated with the DB is a self-querying one that can filter results based on metadata.

In [30]:
from langchain.tools import tool

@tool
def get_context_for_most_relevant_passage(query, k=3):
    """Finds relevant context for a bible passage"""
    docs = bible_chroma.search(query, search_type='similarity', k=k)
    print(docs)
    most_relevant_doc = docs[0]
    # print('most_relevant_doc', most_relevant_doc)
    verse_ref = most_relevant_doc.metadata['source']
        
    # Input most relevant doc as metadata filter into context_chroma 
    # context_docs = retriever.get_relevant_documents(f"{query} in {verse_ref}")
    # context_docs = context_chroma.search(f"{query} in {verse_ref} ('{most_relevant_doc.page_content}')", search_type='similarity', k=1)
    context_docs = context_chroma.search(query, search_type='similarity', k=k)
    
    # return most_relevant_doc, context_docs
    return docs, context_docs

# tools = [
#     Tool(
#         name="Context for Most Relevant Passage",
#         func=get_context_for_most_relevant_passage.run,
#         description="useful for when you need to find relevant linguistic context for a Bible passage. Input should be a full question",
#     ),
# ]

# self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)


## Define custom tools for an agent

In [31]:
# Import things that are needed generically
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper


In [32]:
tools = [
    Tool(
        name="Bible Verse Reader Lookup",
        # Use the 
        func=lambda x: bible_chroma.search(x, search_type='similarity', k=5),
        description="useful for finding verses that are similar to the user's query, not suitable for complex queries",
    ),
    # Tool(
    #     name="Bible Words Lookup",
    #     func=macula_greek_agent.run, # Note: using the NT-only agent here
    #     description="useful for finding information about biblical words from the Macula TSV data, which includes glosses, lemmas, normalized forms, and more. This tool is not useful for grammar and syntax questions (about subjects, objects, verbs, etc.), but is useful for finding information about the words themselves",
    # ),
    Tool(
        name="Bible Verse Dataframe Tool",
        func=macula_greek_agent.run, # Note: using the NT-only agent here
        description="useful for finding information about Bible verses in a dataframe in case counting, grouping, aggregating, or list building is required. This tool is not useful for grammar and syntax questions (about subjects, objects, verbs, etc.), but is useful for finding information about the words (English or Greek or Greek lemmas) themselves",
    ),
    Tool(
        name="Linguistic Data Lookup",
        func=lambda x: context_chroma.similarity_search_with_score(x, search_type='similarity', k=2),
        description="useful for finding linguistic data about the user's query. Use this if the user is asking a question that relates to the linguistic discourse, situational context, participants, semantic roles (source, process, goal, etc.), or who the speakers are in a passage",
    ),
    # Tool(
    #     name="Context for Most Relevant Passage", # NOTE: this tool isn't working quite right. Needs some work
    #     func=get_context_for_most_relevant_passage.run,
    #     description="useful for when you need to find relevant linguistic context for a Bible passage. Input should be 'situation for' and the original user query",
    # ),
    Tool(
        name="Syntax Data Lookup",
        func=lambda x: get_syntax_for_query(x),
        description="useful for finding syntax data about the user's query. Use this if the user is asking a question that relates to a sentence's structure, such as 'who is the subject of this sentence?' or 'what are the circumstances of this verb?'", 
    ),
    Tool(
        name="Theological Data Lookup",
        func=lambda x: theology_chroma.search(x, search_type='similarity', k=5),
        description="useful for finding theological data about the user's query. Use this if the user is asking about theological concepts or value-oriented questions about 'why' the Bible says certain things. Always be sure to cite the source of the data",
    ),
    Tool(
        name="Encyclopedic Data Lookup",
        func=lambda x: encyclopedic_chroma.similarity_search_with_score(x, search_type='similarity', k=5),
        description="useful for finding encyclopedic data about the user's query. Use this if the user is asking about historical, cultural, geographical, archaeological, or other types of information from secondary sources",
    ),
    Tool(
        name="Any Other Kind of Question Tool",
        func=lambda x: "Sorry, I don't know!",
        description="This tool is for vague, broad, ambiguous questions",
    ),
]

In [33]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# from langchain.callbacks.streamlit import StreamlitCallbackHandler
from langchain.llms.fake import FakeListLLM

agent = initialize_agent(tools, OpenAI(temperature=0, streaming=True, callbacks=[StreamingStdOutCallbackHandler()]), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
# agent = initialize_agent(tools, OpenAI(temperature=0, streaming=True, callbacks=[StreamlitCallbackHandler()]), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)


## Test agent tools

In [25]:
agent.run("Where did Jesus heal the man born blind?")



[1m> Entering new AgentExecutor chain...[0m
 I need to find a Bible verse that answers this question.
Action: Bible Verse Reader Lookup
Action Input: Jesus heal man born blind[32;1m[1;3m I need to find a Bible verse that answers this question.
Action: Bible Verse Reader Lookup
Action Input: Jesus heal man born blind[0m
Observation: [36;1m[1;3m[Document(page_content='Now as Jesus was passing by, He saw a man blind from birth,', metadata={'source': 'John 9:1', 'usfm': 'JHN 9:1'}), Document(page_content='“What do you want Me to do for you?” Jesus asked. “Rabboni,” said the blind man, “let me see again.”', metadata={'source': 'Mark 10:51', 'usfm': 'MRK 10:51'}), Document(page_content='Once again Jesus placed His hands on the man’s eyes, and when he opened them his sight was restored, and he could see everything clearly.', metadata={'source': 'Mark 8:25', 'usfm': 'MRK 8:25'}), Document(page_content='Never before has anyone heard of opening the eyes of a man born blind.', metadata={

'Jesus healed the man born blind in John 9:1, Mark 10:51, Mark 8:25, and John 9:32.'

In [41]:
agent.run("In what city did Jesus heal the man born blind?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find a Bible verse that mentions this event.
Action: Bible Verse Lookup
Action Input: Jesus healing man born blind[0m
Observation: [36;1m[1;3m[Document(page_content='Now as Jesus was passing by, He saw a man blind from birth,', metadata={'source': 'John 9:1', 'usfm': 'JHN 9:1'}), Document(page_content='Once again Jesus placed His hands on the man’s eyes, and when he opened them his sight was restored, and he could see everything clearly.', metadata={'source': 'Mark 8:25', 'usfm': 'MRK 8:25'}), Document(page_content='Never before has anyone heard of opening the eyes of a man born blind.', metadata={'source': 'John 9:32', 'usfm': 'JHN 9:32'}), Document(page_content='At that very hour Jesus healed many people of their diseases, afflictions, and evil spirits, and He gave sight to many who were blind.', metadata={'source': 'Luke 7:21', 'usfm': 'LUK 7:21'}), Document(page_content='“What do you want Me to do for you?” 

'The city where Jesus healed the man born blind was Jerusalem.'

In [31]:
agent.run("Who is most often mentioned in the book of luke?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out who is mentioned the most in the book of Luke.
Action: Bible Verse Dataframe Tool
Action Input: Book of Luke[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the book in the dataframe
Action: python_repl_ast
Action Input: df[df['book'] == 'Luke'][0m
Observation: [36;1m[1;3m     book_chapter_verse                                              gloss  \
4040           LUK 10:1  After now these things appointed the Lord othe...   
4041          LUK 10:10  Into whatever now - city you might enter and n...   
4042          LUK 10:11  Even the dust - having clung to us out of the ...   
4043          LUK 10:12  I say to you that for Sodom in the day that mo...   
4044          LUK 10:13  Woe to you Chorazin Woe to you Bethsaida For i...   
...                 ...                                                ...   
5184           LUK 9:61  Said then also another I will fo

'Jesus is mentioned the most in the book of Luke.'

In [35]:
agent.run("What can you tell me about the discourse structure in the account of the man born blind?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out what the discourse structure is.
Action: Linguistic Data Lookup
Action Input: Account of the man born blind[0m
Observation: [38;5;200m[1;3m[(Document(page_content='## Discourse features\n\nThis verse has 3 discourse features (these are useful heuristic interpretive annotations that tell you about the nature of the proposition a word is in):\n- Main clauses is defined as Main clauses are the top-level clauses in a sentence. They are the clauses that are not embedded in other clauses.\n  - καὶ (And) εἶπαν (they said) αὐτῷ (to him)\n  - Ποῦ (Where) ἐστιν (is) ἐκεῖνος (He)\n  - λέγει (He says)\n  - Οὐκ (Not) οἶδα (I know)\n- Reported Speech is defined as Reported speech.\n  - Ποῦ (Where) ἐστιν (is) ἐκεῖνος (He)\n  - Οὐκ (Not) οἶδα (I know)\n- Historical Present is defined as Highlights not the speech or act to which it refers but the event(s) that follow (DFNTG §12.2).\n  - λέγει (He says)\n\nSpeaker data i

'The discourse structure in the account of the man born blind includes main clauses, reported speech, historical present, focus+, ambiguous, embedded DFE, over-encoding, articular pronoun, and topical genitive.'

In [37]:
agent.run("What can you tell me about the syntactic structure in the account of the man born blind?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to look up the syntax data of the passage.
Action: Syntax Data Lookup
Action Input: Account of the man born blind[0mhttps://labs.clear.bible/symphony-dev/api/GNT/Nestle1904/lowfat?usfm-ref=JHN%209:1

Observation: [36;1m[1;3mAnd (Καὶ)] 
[
[[+: [v: passing by (παράγων)] [v: He saw (εἶδεν)] [o: a man (ἄνθρωπον)] blind (τυφλὸν)] from (ἐκ)] birth.(γενετῆς)] [0m
Thought:[32;1m[1;3m I now know the syntactic structure of the passage.
Final Answer: The syntactic structure of the passage is that of a verb phrase with a subject, verb, and object. The subject is "He", the verb is "saw", and the object is "a man blind from birth".[0m

[1m> Finished chain.[0m


'The syntactic structure of the passage is that of a verb phrase with a subject, verb, and object. The subject is "He", the verb is "saw", and the object is "a man blind from birth".'

In [46]:
agent.run("Who are the participants in the account of the man born blind? How would you describe the situation?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find the passage in the Bible that describes the man born blind.
Action: Bible Verse Lookup
Action Input: man born blind[0m
Observation: [36;1m[1;3m[Document(page_content='Now as Jesus was passing by, He saw a man blind from birth,', metadata={'source': 'John 9:1', 'usfm': 'JHN 9:1'}), Document(page_content='Never before has anyone heard of opening the eyes of a man born blind.', metadata={'source': 'John 9:32', 'usfm': 'JHN 9:32'}), Document(page_content='and His disciples asked Him, “Rabbi, who sinned, this man or his parents, that he was born blind?”', metadata={'source': 'John 9:2', 'usfm': 'JHN 9:2'}), Document(page_content='His parents answered, “We know he is our son, and we know he was born blind.', metadata={'source': 'John 9:20', 'usfm': 'JHN 9:20'}), Document(page_content='The Jews still did not believe that the man had been blind and had received his sight until they summoned his parents', metadata={

'The participants in the account of the man born blind are Jesus, the man born blind, his parents, the disciples, and the Jews. The situation is that Jesus is passing by and sees the man born blind, and the disciples ask Jesus who sinned to cause the man to be born blind. The Jews do not believe the man was born blind until they summon his parents.'

In [43]:
agent.run("Why did Jesus heal the man born blind?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should look for a Bible verse that answers this question.
Action: Bible Verse Lookup
Action Input: Why did Jesus heal the man born blind?[0m
Observation: [36;1m[1;3m[Document(page_content='Now as Jesus was passing by, He saw a man blind from birth,', metadata={'source': 'John 9:1', 'usfm': 'JHN 9:1'}), Document(page_content='The blind and the lame came to Him at the temple, and He healed them.', metadata={'source': 'Matthew 21:14', 'usfm': 'MAT 21:14'}), Document(page_content='Once again Jesus placed His hands on the man’s eyes, and when he opened them his sight was restored, and he could see everything clearly.', metadata={'source': 'Mark 8:25', 'usfm': 'MRK 8:25'}), Document(page_content='They brought to the Pharisees the man who had been blind.', metadata={'source': 'John 9:13', 'usfm': 'JHN 9:13'}), Document(page_content='At that very hour Jesus healed many people of their diseases, afflictions, and evil spirits, an

'Jesus healed the man born blind to demonstrate His power and authority over physical and spiritual blindness, and to show that He was the Messiah.'

# Steamlit UI

In [38]:
import streamlit as st
import sys
from io import StringIO

# Create a text input for user input
user_input = st.text_input("Enter your message:")

# Capture stdout
class CapturedOutput:
    def __enter__(self):
        self._stdout = sys.stdout
        sys.stdout = self._stringio = StringIO()
        return self

    def __exit__(self, type, value, traceback):
        sys.stdout = self._stdout

    @property
    def getvalue(self):
        return self._stringio.getvalue()

if user_input:
    with CapturedOutput() as output:
        # Run the agent with the user's input
        final_output = agent.run(user_input)

    # Display the final output in the main app
    st.write(f"Chatbot: {final_output}")

    # Display the stdout in the sidebar
    st.sidebar.write(output.getvalue)


In [37]:
import streamlit as st

st.title('My LangChain App')
user_input = st.text_input("Enter some text")

if user_input:
    # capture standard output before running agent
    with Capturing() as output:
        result = agent.run(user_input)
        # print the standard output
        for line in output:
            st.write(line)
            
    st.write(result)

# Gradio UI

Create a fake agent for testing purposes

In [61]:
# from langchain.llms.fake import FakeListLLM
# from langchain.agents import load_tools
# from langchain.agents import initialize_agent
# from langchain.agents import AgentType
# tools = load_tools(["python_repl"])
# responses=[
#     "Action: Python REPL\nAction Input: print(2 + 2)",
#     "Final Answer: The final answer happens to be 4!"
# ]
# llm = FakeListLLM(responses=responses)
# # agent = initialize_agent(tools, OpenAI(temperature=0, streaming=True, callbacks=[StreamingStdOutCallbackHandler()]), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
# fake_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
# def fake_agent_run(inputs):
#     fake_agent.run(inputs)

In [62]:
# fake_agent_run('')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: Python REPL
Action Input: print(2 + 2)[0m
Observation: [36;1m[1;3m4
[0m
Thought:[32;1m[1;3mFinal Answer: The final answer happens to be 4![0m

[1m> Finished chain.[0m


Special callback manager for Gradio

In [34]:
import gradio as gr
import time
from langchain.callbacks.base import BaseCallbackHandler
from typing import Any
from threading import Thread

class StreamToGradio(BaseCallbackHandler):
    """Callback handler for streaming. Only works with LLMs that support streaming."""
    def __init__(self, chat_history):
        self.chat_history = chat_history

    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
        """Run on new LLM token. Only available when streaming is enabled."""
        self.chat_history[-1][1] += token
        yield self.chat_history


chat_history = [["", ""]]
# Instantiate agent with streaming to gradio enabled
agent = initialize_agent(tools, OpenAI(streaming=True, callbacks=[StreamToGradio(chat_history=chat_history)], temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(value="Where is Barnabas mentioned?")
    clear = gr.Button("Clear")

    def user(user_message, history):
        return gr.update(value="", interactive=False), history + [[user_message, ""]]

    def bot(history):
        Thread(target=agent.run, args=(history[-1][0],)).start()  # Run the agent in a separate thread
        while True:
            yield history
            time.sleep(0.1)  # Wait a bit before checking for new output

    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
demo.launch()


Running on local URL:  http://127.0.0.1:7878

To create a public link, set `share=True` in `launch()`.




Non-streaming gradio app

In [36]:
import gradio as gr

def qa_run(inputs):
    return multi_df_agent.run(inputs)

def lb_run(inputs):
    return macula_greek_agent.run(inputs)

def csv_run(inputs):
    return macula_greek_agent.run(inputs)

def agent_run(inputs):
    return agent.run(inputs)

# TODO: for each function, aggregate a list of 'sources_consulted'

app = gr.Blocks(theme='bethecloud/storj_theme')

with app:
    gr.Markdown("## Query Linguistic Data with Natural Language")
    gr.Markdown("This demo extracts answers and lists of results from linguistic data using natural language input only.")
    # gr.Markdown("[Read More](https://medium.com/@langchain/using-openai-gpt-3-to-query-and-manipulate-dataframes-1f4f8f211e4d)")

    with gr.Tab("Question Answering from Greek and Hebrew Spreadsheet Data"):
        qa_text = gr.Textbox(lines=5, label="Question", value="How many unique domain labels are there (each domain label is a list)?")
        qa_button = gr.Button("Run")
        qa_output = gr.Textbox(label="Answer", lines=5)
        qa_button.click(
            qa_run,
            inputs=qa_text,
            outputs=qa_output,
        )
    with gr.Tab("List Building"):
        lb_text = gr.Textbox(lines=5, label="Prompt", value=list_building_prompt)
        lb_button = gr.Button("Run")
        lb_output = gr.Textbox(label="List", lines=5)
        lb_button.click(
            lb_run,
            inputs=lb_text,
            outputs=lb_output,
        )
    with gr.Tab("CSV Generation"):
        csv_text = gr.Textbox(lines=5, label="Prompt", value=list_building_and_save_as_csv_prompt)
        csv_button = gr.Button("Run")
        csv_output = gr.Textbox(label="CSV", lines=5)
        csv_button.click(
            csv_run,
            inputs=csv_text,
            outputs=csv_output,
        )
    with gr.Tab("AI Agent with Access to Multiple Tools"):
        qa_text = gr.Textbox(lines=5, label="Question", value="How many unique domain labels are there (each domain label is a list)?")
        qa_button = gr.Button("Run")
        qa_output = gr.Textbox(label="Answer", lines=5)
        qa_button.click(
            agent_run,
            inputs=qa_text,
            outputs=qa_output,
        )

app.launch(debug=True)


Streaming gradio app

In [45]:
import gradio as gr

def add_one_word_to_string(inputs):
    return inputs + " " + "hello" + str(len(inputs))

def agent_run(inputs, state=""):
    # response = agent.run(inputs) for now just hard code 'streaming' input until we can get the streaming working
    
    response = add_one_word_to_string(inputs)
    state += response + " "
    return state, state

app = gr.Interface(
    fn=agent_run,
    inputs=[
        gr.Textbox(lines=2, placeholder='Enter here...'),
        "state"
    ],
    outputs=[
        "text",
        "state"
    ],
    title="Query Linguistic Data with Natural Language",
    description="This demo extracts answers and lists of results from linguistic data using natural language input only.",
    live=True,
    examples=[
        ["In what city did Jesus heal the man born blind?"],
        ["Who is most often mentioned in the book of luke?"],
        ["What can you tell me about the discourse structure in the account of the man born blind?"],
        ["What can you tell me about the syntactic structure in the account of the man born blind?"],
        ["Who are the participants in the account of the man born blind? How would you describe the situation?"],
        ["Why did Jesus heal the man born blind?"],
        ["How many unique domain labels are there (each domain label is a list)?"],
        [list_building_prompt],
        [list_building_and_save_as_csv_prompt],
    ],
)

app.launch(debug=True)
    

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.




In [None]:
import gradio as gr

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return gr.update(value="", interactive=False), history + [[user_message, None]]

    def bot(history):
        bot_message = agent.run(history[-1][0])  # Use your agent to generate the response
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            yield history

    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
demo.launch()


Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.






[1m> Entering new AgentExecutor chain...[0m
 I need to find a Bible verse that mentions Silas.
Action: Bible Verse Reader Lookup
Action Input: Silas[32;1m[1;3m I need to find a Bible verse that mentions Silas.
Action: Bible Verse Reader Lookup
Action Input: Silas[0m
Observation: [36;1m[1;3m[Document(page_content='Shallum, Amariah, and Joseph.', metadata={'source': 'Ezra 10:42', 'usfm': 'EZR 10:42'}), Document(page_content='Jehud, Bene-berak, Gath-rimmon,', metadata={'source': 'Joshua 19:45', 'usfm': 'JOS 19:45'}), Document(page_content='Shelemiah, Nathan, Adaiah,', metadata={'source': 'Ezra 10:39', 'usfm': 'EZR 10:39'}), Document(page_content='Adam, Seth, Enosh,', metadata={'source': '1 Chronicles 1:1', 'usfm': '1CH 1:1'}), Document(page_content='Vaniah, Meremoth, Eliashib,', metadata={'source': 'Ezra 10:36', 'usfm': 'EZR 10:36'})][0m
Thought: None of these verses mention Silas.
Action: Bible Verse Dataframe Tool
Action Input: Silas[32;1m[1;3m None of these verses mention S

Streaming gradio app:

In [50]:
import gradio as gr
import sys
import io

class OutputCapture:
    def __init__(self):
        self.stream = io.StringIO()

    def __enter__(self):
        self.old_stdout = sys.stdout
        sys.stdout = self.stream
        return self

    def __exit__(self, type, value, traceback):
        sys.stdout = self.old_stdout

    def getvalue(self):
        return self.stream.getvalue()

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return gr.update(value="", interactive=False), history + [[user_message, None]]

    def bot(history):
        with OutputCapture() as c:
            agent.run(history[-1][0])  # Use your agent to generate the response
            bot_message = c.getvalue()  # Get the output from the agent
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            yield history

    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
demo.launch()



Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




In [37]:
# import subprocess

# def myrun(cmd):
#     """from http://blog.kagesenshi.org/2008/02/teeing-python-subprocesspopen-output.html
#     """
#     p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
#     stdout = []
#     while True:
#         line = p.stdout.readline()
#         stdout.append(line)
#         print line,
#         if line == '' and p.poll() != None:
#             break
#     return ''.join(stdout)

Stream with separate thread

In [32]:
# import gradio as gr
# from queue import SimpleQueue
# from threading import Thread
# from langchain.callbacks.base import BaseCallbackHandler
# from langchain.schema import LLMResult
# from typing import Any, Union, Dict, List

# # Create a queue
# q = SimpleQueue()

# # Define a signal for job completion
# job_done = object()

# # Define a custom callback handler
# class StreamingGradioCallbackHandler(BaseCallbackHandler):
#     def __init__(self, q: SimpleQueue):
#         self.q = q
#     def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
#         """Run when LLM starts running. Clean the queue."""
#         while not self.q.empty():
#             try:
#                 self.q.get(block=False)
#             except Empty:
#                 continue
#     def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
#         """Run on new LLM token. Only available when streaming is enabled."""
#         self.q.put(token)
#     def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
#         """Run when LLM ends running."""
#         self.q.put(job_done)
#     def on_llm_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> None:
#         """Run when LLM errors."""
#         self.q.put(job_done)

# # Define a function to run the agent in a separate thread and read from the queue
# def run_agent_in_thread(agent, input):
#     thread = Thread(target=agent.arun, kwargs={"input": input})
#     thread.start()
#     output = ""
#     stdout = ""
#     while True:
#         next_token = q.get(block=True)  # Blocks until an input is available
#         if next_token is job_done:
#             break
#         output += next_token
#         stdout += next_token  # Capture the stdout
#     thread.join()
#     return output, stdout

# # Modify your agent functions to use the new function
# def qa_run(inputs):
#     return run_agent_in_thread(multi_df_agent, inputs)

# def lb_run(inputs):
#     return run_agent_in_thread(macula_greek_agent, inputs)

# def csv_run(inputs):
#     return run_agent_in_thread(macula_greek_agent, inputs)

# def agent_run(inputs):
#     return run_agent_in_thread(agent, inputs)

# app = gr.Blocks(theme='bethecloud/storj_theme')

# with app:
#     gr.Markdown("## Query Linguistic Data with Natural Language")
#     gr.Markdown("This demo extracts answers and lists of results from linguistic data using natural language input only.")
#     # gr.Markdown("[Read More](https://medium.com/@langchain/using-openai-gpt-3-to-query-and-manipulate-dataframes-1f4f8f211e4d)")

#     with gr.Tab("Question Answering from Greek and Hebrew Spreadsheet Data"):
#         qa_text = gr.Textbox(lines=5, label="Question", value="How many unique domain labels are there (each domain label is a list)?")
#         qa_button = gr.Button("Run")
#         qa_output = gr.Textbox(label="Answer", lines=5)
#         stdout_output = gr.Textbox(label="Agent Output", lines=5)
#         qa_button.click(
#             qa_run,
#             inputs=qa_text,
#             outputs=[qa_output,stdout_output]
#         )
#     with gr.Tab("List Building"):
#         lb_text = gr.Textbox(lines=5, label="Prompt", value=list_building_prompt)
#         lb_button = gr.Button("Run")
#         lb_output = gr.Textbox(label="List", lines=5)
#         stdout_output = gr.Textbox(label="Agent Output", lines=5)
#         lb_button.click(
#             lb_run,
#             inputs=lb_text,
#             outputs=[lb_output,stdout_output]
#         )
#     with gr.Tab("CSV Generation"):
#         csv_text = gr.Textbox(lines=5, label="Prompt", value=list_building_and_save_as_csv_prompt)
#         csv_button = gr.Button("Run")
#         csv_output = gr.Textbox(label="CSV", lines=5)
#         stdout_output = gr.Textbox(label="Agent Output", lines=5)
#         csv_button.click(
#             csv_run,
#             inputs=csv_text,
#             outputs=[csv_output,stdout_output]
#         )
#     with gr.Tab("AI Agent with Access to Multiple Tools"):
#         qa_text = gr.Textbox(lines=5, label="Question", value="How many unique domain labels are there (each domain label is a list)?")
#         qa_button = gr.Button("Run")
#         qa_output = gr.Textbox(label="Answer", lines=5)
#         stdout_output = gr.Textbox(label="Agent Output", lines=5)
#         qa_button.click(
#             agent_run,
#             inputs=qa_text,
#             outputs=[qa_output,stdout_output]
#         )

# app.launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.




# Flask UI

In [None]:
from flask import Flask, render_template, request
from flask_socketio import SocketIO, emit

app = Flask(__name__)
socketio = SocketIO(app)

@app.route('/')
def index():
    return render_template('index.html')

@socketio.on('run_agent')
def handle_run_agent(message):
    # Run your agent here
    result = agent.run(message['data'])

    # Emit the result to the client
    emit('agent_result', {'result': result})

if __name__ == '__main__':
    socketio.run(app)