In [1]:
from tfob import TFOb, BHSA, DSS

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [2]:
import pandas as pd

In [30]:
isaiah_clauses = pd.read_csv("data/isaiah_cl_types.csv")

In [39]:
# List of GPT's correct and wrong answers

correct_answers = [783781, 776242, 781407, 788386, 784024, 776242]
wrong_answers = [784414, 781711, 778494, 786630, 781186, 787356, 784799, 781316, 787860, 780207, 785447, 777547, 788275, 783496]


In [46]:
id = 788386
TFOb(id, BHSA)

<phrase_1 ">WRJD">

In [49]:
# Get the types of the preceding and the next clauses


def find_prec_cl(i):
    """Returns the preceding clause from a clause using the index.
    If the clause is the first in the verse, returns None."""
    
    if i != 0:
        return clauses[i - 1]


def find_next_cl(i):
    """Returns the next clause from a clause using the index.
    If the clause is the last in the verse, returns None."""
    
    if i +1 < len(clauses):
        return clauses[i + 1]


def find_cl_type(clause):
    """Returns the type of a clause. If there is not clause, returns None."""
    if clause != None:
        return clause.typ


### Generate a CSV file with information about the correct answers of GPT

In [50]:
# Generate the needed lists

verb_ids_list = []

clauses_list = []
cl_types_list = []

prec_cl_list = []
pr_cl_types_list = []

next_cl_list = []
n_cl_types_list = []

books = []
chapters = []
verse_numbers = []

for verb_id in correct_answers:
    
    verb = TFOb(verb_id, BHSA)
    book = verb.book[0]
    chapter = verb.chapter[0]
    verse_number = verb.verse[0]
    books.append(book)
    chapters.append(chapter)
    verse_numbers.append(verse_number)
    
    verb_ids_list.append(verb_id)
    
    # Get the index of the clause in the verse
    clause = verb.to_clauses
    clauses = verb.to_verses.to_clauses
    i = clauses.ids.index(clause.ids[0])
    
    clauses_list.append(clause.text)
    
    # Get the surroundings clauses and their text version
    
    # for the preceding clause
    prec_cl = find_prec_cl(i)
    if prec_cl != None:
        pr_cl_text = prec_cl.text
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(find_cl_type(prec_cl)[0])
    else:
        pr_cl_text = None
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(None)
        
    # for the next clause
    next_cl = find_next_cl(i)
    if next_cl != None:
        next_cl_text = next_cl.text
        next_cl_list.append(next_cl_text)
        n_cl_types_list.append(find_cl_type(next_cl)[0])
    else:
        next_cl_text = None
        next_cl_list.append(next_cl) 
        n_cl_types_list.append(None)
    
    # Get the types of each clause
    cl_types_list.append(find_cl_type(clause)[0])  

In [51]:
# Generate a pandas dataframe with lists

correct_answers_df = pd.DataFrame({
    "verb_id": verb_ids_list, 
    "book": books,
    "chapter": chapters,
    "verse_num": verse_numbers,
    "clause": clauses_list, 
    "cl_type" : cl_types_list,
    "prec_cl": prec_cl_list,
    "pr_cl_type": pr_cl_types_list,
    "next_cl": next_cl_list,
    "n_cl_type": n_cl_types_list,
             })

In [52]:
correct_answers_df

Unnamed: 0,verb_id,book,chapter,verse_num,clause,cl_type,prec_cl,pr_cl_type,next_cl,n_cl_type
0,783781,Isaiah,43,23,◊ú÷π÷Ω◊ê÷æ◊î÷µ◊ë÷µ÷§◊ô◊ê◊™÷∏ ◊ú÷º÷¥◊ô÷ô ◊©◊Ç÷µ÷£◊î ◊¢÷π◊ú÷π◊™÷∂÷î◊ô◊ö÷∏,xQt0,,,◊ï÷º◊ñ÷∞◊ë÷∏◊ó÷∂÷ñ◊ô◊ö÷∏ ◊ú÷π÷£◊ê ◊õ÷¥◊ë÷º÷∑◊ì÷∞◊™÷º÷∏÷ë◊†÷¥◊ô,WxQ0
1,776242,Isaiah,10,3,◊¢÷∑◊ú÷æ◊û÷¥◊ô÷ô ◊™÷º÷∏◊†÷£◊ï÷º◊°◊ï÷º ◊ú÷∞◊¢÷∂◊ñ÷∞◊®÷∏÷î◊î,xYq0,◊û÷¥◊û÷º÷∂◊®÷∞◊ó÷∏÷£◊ß ◊™÷º÷∏◊ë÷π÷ë◊ï◊ê,xYq0,◊ï÷∞◊ê÷∏÷•◊†÷∏◊î ◊™÷∑◊¢÷∑◊ñ÷∞◊ë÷ñ◊ï÷º ◊õ÷º÷∞◊ë÷π◊ï◊ì÷∞◊õ÷∂÷Ω◊ù◊É,WxY0
2,781407,Isaiah,36,2,◊ï÷∑◊ô÷º÷¥◊©◊Å÷∞◊ú÷∑÷£◊ó ◊û÷∂÷Ω◊ú÷∂◊ö÷∞÷æ◊ê÷∑◊©◊Å÷º÷£◊ï÷º◊®◊Ä ◊ê÷∂◊™÷æ◊®÷∑◊ë÷æ◊©◊Å÷∏◊ß÷µ÷®...,WayX,,,◊ï÷∑÷Ω◊ô÷º÷∑◊¢÷≤◊û÷π÷ó◊ì ◊ë÷º÷¥◊™÷∞◊¢÷∏◊ú÷∑◊™÷ô ◊î÷∑◊ë÷º÷∞◊®÷µ◊õ÷∏÷£◊î ◊î÷∏◊¢÷∂◊ú÷∞◊ô÷π◊ï...,Way0
3,788386,Isaiah,63,6,◊ï÷∞◊ê÷π◊ï◊®÷¥÷•◊ô◊ì ◊ú÷∏◊ê÷∏÷ñ◊®÷∂◊• ◊†÷¥◊¶÷∞◊ó÷∏÷Ω◊ù◊É ◊°,WYq0,◊ï÷∑◊ê÷≤◊©◊Å÷∑◊õ÷º÷∞◊®÷µ÷ñ◊ù ◊ë÷º÷∑◊ó÷≤◊û÷∏◊™÷¥÷ë◊ô,WYq0,,
4,784024,Isaiah,44,14,◊ï÷∑◊ô÷º÷¥◊ß÷º÷∑÷§◊ó ◊™÷º÷¥◊®÷∞◊ñ÷∏◊î÷ô ◊ï÷∞◊ê÷∑◊ú÷º÷π÷î◊ï◊ü,Way0,◊ú÷¥◊õ÷∞◊®÷∏◊™÷æ◊ú÷π÷£◊ï ◊ê÷≤◊®÷∏◊ñ÷¥÷î◊ô◊ù,InfC,◊ï÷∑◊ô÷∞◊ê÷∑◊û÷º÷∂◊•÷æ◊ú÷π÷ñ◊ï ◊ë÷º÷∑◊¢÷≤◊¶÷µ◊ô÷æ◊ô÷∏÷ë◊¢÷∑◊®,Way0
5,776242,Isaiah,10,3,◊¢÷∑◊ú÷æ◊û÷¥◊ô÷ô ◊™÷º÷∏◊†÷£◊ï÷º◊°◊ï÷º ◊ú÷∞◊¢÷∂◊ñ÷∞◊®÷∏÷î◊î,xYq0,◊û÷¥◊û÷º÷∂◊®÷∞◊ó÷∏÷£◊ß ◊™÷º÷∏◊ë÷π÷ë◊ï◊ê,xYq0,◊ï÷∞◊ê÷∏÷•◊†÷∏◊î ◊™÷∑◊¢÷∑◊ñ÷∞◊ë÷ñ◊ï÷º ◊õ÷º÷∞◊ë÷π◊ï◊ì÷∞◊õ÷∂÷Ω◊ù◊É,WxY0


### Generate a CSV file with information about the wrong answers of GPT

In [54]:
# Generate the needed lists

verb_ids_list = []

clauses_list = []
cl_types_list = []

prec_cl_list = []
pr_cl_types_list = []

next_cl_list = []
n_cl_types_list = []

books = []
chapters = []
verse_numbers = []

for verb_id in wrong_answers:
    
    verb = TFOb(verb_id, BHSA)
    book = verb.book[0]
    chapter = verb.chapter[0]
    verse_number = verb.verse[0]
    books.append(book)
    chapters.append(chapter)
    verse_numbers.append(verse_number)
    
    verb_ids_list.append(verb_id)
    
    # Get the index of the clause in the verse
    clause = verb.to_clauses
    clauses = verb.to_verses.to_clauses
    i = clauses.ids.index(clause.ids[0])
    
    clauses_list.append(clause.text)
    
    # Get the surroundings clauses and their text version
    
    # for the preceding clause
    prec_cl = find_prec_cl(i)
    if prec_cl != None:
        pr_cl_text = prec_cl.text
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(find_cl_type(prec_cl)[0])
    else:
        pr_cl_text = None
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(None)
        
    # for the next clause
    next_cl = find_next_cl(i)
    if next_cl != None:
        next_cl_text = next_cl.text
        next_cl_list.append(next_cl_text)
        n_cl_types_list.append(find_cl_type(next_cl)[0])
    else:
        next_cl_text = None
        next_cl_list.append(next_cl) 
        n_cl_types_list.append(None)
    
    # Get the types of each clause
    cl_types_list.append(find_cl_type(clause)[0])  

In [55]:
# Generate a pandas dataframe with lists

wrong_answers_df = pd.DataFrame({
    "verb_id": verb_ids_list, 
    "book": books,
    "chapter": chapters,
    "verse_num": verse_numbers,
    "clause": clauses_list, 
    "cl_type" : cl_types_list,
    "prec_cl": prec_cl_list,
    "pr_cl_type": pr_cl_types_list,
    "next_cl": next_cl_list,
    "n_cl_type": n_cl_types_list,
             })

In [56]:
wrong_answers_df

Unnamed: 0,verb_id,book,chapter,verse_num,clause,cl_type,prec_cl,pr_cl_type,next_cl,n_cl_type
0,784414,Isaiah,45,13,◊ï÷∞◊í÷∏◊ú◊ï÷º◊™÷¥÷£◊ô ◊ô÷∞◊©◊Å÷∑◊ú÷º÷µ÷î◊ó÷∑,WxY0,◊î÷Ω◊ï÷º◊ê÷æ◊ô÷¥◊ë÷∞◊†÷∂÷§◊î ◊¢÷¥◊ô◊®÷¥◊ô÷ô,XYqt,◊ú÷π÷§◊ê ◊ë÷¥◊û÷∞◊ó÷¥◊ô◊®÷ô,Ellp
1,781711,Isaiah,37,3,◊õ÷º÷¥÷£◊ô ◊ë÷∏÷§◊ê◊ï÷º ◊ë÷∏◊†÷¥◊ô◊ù÷ô ◊¢÷∑◊ì÷æ◊û÷∑◊©◊Å÷∞◊ë÷º÷µ÷î◊®,xQtX,◊ô÷π◊ï◊ù÷æ◊¶÷∏◊®÷∏÷ß◊î ◊ï÷∞◊™÷π◊ï◊õ÷µ◊ó÷∏÷õ◊î ◊ï÷º◊†÷∞◊ê÷∏◊¶÷∏÷ñ◊î ◊î÷∑◊ô÷º÷π÷£◊ï◊ù ◊î÷∑...,NmCl,◊ï÷∞◊õ÷π÷•◊ó÷∑ ◊ê÷∑÷ñ◊ô÷¥◊ü ◊ú÷∞◊ú÷µ◊ì÷∏÷Ω◊î◊É,NmCl
2,778494,Isaiah,22,15,◊ú÷∂◊ö÷∞÷æ,ZIm0,◊õ÷º÷π÷•◊î ◊ê÷∏◊û÷∑÷õ◊® ◊ê÷≤◊ì÷π◊†÷∏÷•◊ô ◊ô÷∞◊î◊ï÷¥÷ñ◊î ◊¶÷∞◊ë÷∏◊ê÷π÷ë◊ï◊™,xQtX,◊ë÷º÷π◊ê÷ô ◊ê÷∂◊ú÷æ◊î÷∑◊°÷º÷π◊õ÷µ÷£◊ü ◊î÷∑◊ñ÷º÷∂÷î◊î ◊¢÷∑◊ú÷æ◊©◊Å÷∂◊ë÷∞◊†÷∏÷ñ◊ê,ZIm0
3,786630,Isaiah,55,1,◊ú÷∞◊õ÷£◊ï÷º ◊ú÷∑◊û÷º÷∑÷î◊ô÷¥◊ù,ZIm0,◊õ÷º÷∏◊ú÷æ◊¶÷∏◊û÷µ◊ê÷ô,Voct,◊ï÷∑◊ú÷∞◊õ÷§◊ï÷º,WIm0
4,781186,Isaiah,34,10,◊ú÷∞◊†÷µ÷£◊¶÷∑◊ó ◊†÷∞◊¶÷∏◊ó÷¥÷î◊ô◊ù ◊ê÷µ÷•◊ô◊ü ◊¢÷π◊ë÷µ÷ñ◊® ◊ë÷º÷∏÷Ω◊î÷º◊É,Ptcp,◊û÷¥◊ì÷º÷π÷§◊ï◊® ◊ú÷∏◊ì÷π◊ï◊®÷ô ◊™÷º÷∂◊ó÷±◊®÷∏÷î◊ë,xYq0,,
5,787356,Isaiah,58,9,◊©◊Å÷∞◊ú÷∑÷•◊ó ◊ê÷∂◊¶÷∞◊ë÷º÷∑÷ñ◊¢,InfC,◊ê÷¥◊ù÷æ◊™÷º÷∏◊°÷¥÷§◊ô◊® ◊û÷¥◊™÷º÷π÷Ω◊ï◊õ÷∞◊ö÷∏÷ô ◊û÷π◊ï◊ò÷∏÷î◊î,xYq0,◊ï÷∞◊ì÷∑◊ë÷º÷∂◊®÷æ◊ê÷∏÷Ω◊ï÷∂◊ü◊É,InfA
6,784799,Isaiah,47,5,◊ï÷º◊ë÷π÷•◊ê÷¥◊ô ◊ë÷∑◊ó÷π÷ñ◊©◊Å÷∂◊ö÷∞,WIm0,◊©◊Å÷∞◊ë÷¥÷•◊ô ◊ì◊ï÷º◊û÷∏÷õ◊ù,ZIm0,◊ë÷º÷∑◊™÷æ◊õ÷º÷∑◊©◊Ç÷∞◊ì÷º÷¥÷ë◊ô◊ù,Voct
7,781316,Isaiah,35,4,◊î÷¥◊†÷º÷µ÷§◊î ◊ê÷±◊ú÷π÷Ω◊î÷µ◊ô◊õ÷∂◊ù÷ô ◊†÷∏◊ß÷∏÷£◊ù ◊ô÷∏◊ë÷π÷î◊ï◊ê ◊í÷º÷∞◊û÷£◊ï÷º◊ú ◊ê...,XYqt,◊ê÷∑◊ú÷æ◊™÷º÷¥◊ô◊®÷∏÷ë◊ê◊ï÷º,xYq0,◊î÷•◊ï÷º◊ê ◊ô÷∏◊ë÷π÷ñ◊ï◊ê,XYqt
8,787860,Isaiah,60,11,◊ú÷∞◊î÷∏◊ë÷¥÷§◊ô◊ê ◊ê÷µ◊ú÷∑÷®◊ô÷¥◊ö÷∞÷ô ◊ó÷µ÷£◊ô◊ú ◊í÷º÷π◊ï◊ô÷¥÷î◊ù,InfC,◊ô÷π◊ï◊û÷∏÷•◊ù ◊ï÷∏◊ú÷∑÷ñ◊ô÷∞◊ú÷∏◊î ◊ú÷π÷£◊ê ◊ô÷¥◊°÷º÷∏◊í÷µ÷ë◊®◊ï÷º,xYq0,◊ï÷º◊û÷∑◊ú÷∞◊õ÷µ◊ô◊î÷∂÷ñ◊ù ◊†÷∞◊î◊ï÷º◊í÷¥÷Ω◊ô◊ù◊É,Ptcp
9,780207,Isaiah,30,8,◊¢÷∑◊™÷º÷∏÷ó◊î ◊ë÷º÷π÷£◊ï◊ê,xIm0,,,◊õ÷∏◊™÷∞◊ë÷∏÷•◊î÷º ◊¢÷∑◊ú÷æ◊ú÷õ◊ï÷º◊ó÷∑ ◊ê÷¥◊™÷º÷∏÷ñ◊ù,ZIm0
