In [1]:
from tfob import TFOb, BHSA, DSS

**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots/node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


In [2]:
import pandas as pd

In [30]:
isaiah_clauses = pd.read_csv("data/isaiah_cl_types.csv")

In [39]:
# List of GPT's correct and wrong answers

correct_answers = [783781, 776242, 781407, 788386, 784024, 776242]
wrong_answers = [784414, 781711, 778494, 786630, 781186, 787356, 784799, 781316, 787860, 780207, 785447, 777547, 788275, 783496]


In [46]:
id = 788386
TFOb(id, BHSA)

<phrase_1 ">WRJD">

In [49]:
# Get the types of the preceding and the next clauses


def find_prec_cl(i):
    """Returns the preceding clause from a clause using the index.
    If the clause is the first in the verse, returns None."""
    
    if i != 0:
        return clauses[i - 1]


def find_next_cl(i):
    """Returns the next clause from a clause using the index.
    If the clause is the last in the verse, returns None."""
    
    if i +1 < len(clauses):
        return clauses[i + 1]


def find_cl_type(clause):
    """Returns the type of a clause. If there is not clause, returns None."""
    if clause != None:
        return clause.typ


### Generate a CSV file with information about the correct answers of GPT

In [50]:
# Generate the needed lists

verb_ids_list = []

clauses_list = []
cl_types_list = []

prec_cl_list = []
pr_cl_types_list = []

next_cl_list = []
n_cl_types_list = []

books = []
chapters = []
verse_numbers = []

for verb_id in correct_answers:
    
    verb = TFOb(verb_id, BHSA)
    book = verb.book[0]
    chapter = verb.chapter[0]
    verse_number = verb.verse[0]
    books.append(book)
    chapters.append(chapter)
    verse_numbers.append(verse_number)
    
    verb_ids_list.append(verb_id)
    
    # Get the index of the clause in the verse
    clause = verb.to_clauses
    clauses = verb.to_verses.to_clauses
    i = clauses.ids.index(clause.ids[0])
    
    clauses_list.append(clause.text)
    
    # Get the surroundings clauses and their text version
    
    # for the preceding clause
    prec_cl = find_prec_cl(i)
    if prec_cl != None:
        pr_cl_text = prec_cl.text
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(find_cl_type(prec_cl)[0])
    else:
        pr_cl_text = None
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(None)
        
    # for the next clause
    next_cl = find_next_cl(i)
    if next_cl != None:
        next_cl_text = next_cl.text
        next_cl_list.append(next_cl_text)
        n_cl_types_list.append(find_cl_type(next_cl)[0])
    else:
        next_cl_text = None
        next_cl_list.append(next_cl) 
        n_cl_types_list.append(None)
    
    # Get the types of each clause
    cl_types_list.append(find_cl_type(clause)[0])  

In [51]:
# Generate a pandas dataframe with lists

correct_answers_df = pd.DataFrame({
    "verb_id": verb_ids_list, 
    "book": books,
    "chapter": chapters,
    "verse_num": verse_numbers,
    "clause": clauses_list, 
    "cl_type" : cl_types_list,
    "prec_cl": prec_cl_list,
    "pr_cl_type": pr_cl_types_list,
    "next_cl": next_cl_list,
    "n_cl_type": n_cl_types_list,
             })

In [52]:
correct_answers_df

Unnamed: 0,verb_id,book,chapter,verse_num,clause,cl_type,prec_cl,pr_cl_type,next_cl,n_cl_type
0,783781,Isaiah,43,23,לֹֽא־הֵבֵ֤יאתָ לִּי֙ שֵׂ֣ה עֹלֹתֶ֔יךָ,xQt0,,,וּזְבָחֶ֖יךָ לֹ֣א כִבַּדְתָּ֑נִי,WxQ0
1,776242,Isaiah,10,3,עַל־מִי֙ תָּנ֣וּסוּ לְעֶזְרָ֔ה,xYq0,מִמֶּרְחָ֣ק תָּבֹ֑וא,xYq0,וְאָ֥נָה תַעַזְב֖וּ כְּבֹודְכֶֽם׃,WxY0
2,781407,Isaiah,36,2,וַיִּשְׁלַ֣ח מֶֽלֶךְ־אַשּׁ֣וּר׀ אֶת־רַב־שָׁקֵ֨...,WayX,,,וַֽיַּעֲמֹ֗ד בִּתְעָלַת֙ הַבְּרֵכָ֣ה הָעֶלְיֹו...,Way0
3,788386,Isaiah,63,6,וְאֹורִ֥יד לָאָ֖רֶץ נִצְחָֽם׃ ס,WYq0,וַאֲשַׁכְּרֵ֖ם בַּחֲמָתִ֑י,WYq0,,
4,784024,Isaiah,44,14,וַיִּקַּ֤ח תִּרְזָה֙ וְאַלֹּ֔ון,Way0,לִכְרָת־לֹ֣ו אֲרָזִ֔ים,InfC,וַיְאַמֶּץ־לֹ֖ו בַּעֲצֵי־יָ֑עַר,Way0
5,776242,Isaiah,10,3,עַל־מִי֙ תָּנ֣וּסוּ לְעֶזְרָ֔ה,xYq0,מִמֶּרְחָ֣ק תָּבֹ֑וא,xYq0,וְאָ֥נָה תַעַזְב֖וּ כְּבֹודְכֶֽם׃,WxY0


### Generate a CSV file with information about the wrong answers of GPT

In [54]:
# Generate the needed lists

verb_ids_list = []

clauses_list = []
cl_types_list = []

prec_cl_list = []
pr_cl_types_list = []

next_cl_list = []
n_cl_types_list = []

books = []
chapters = []
verse_numbers = []

for verb_id in wrong_answers:
    
    verb = TFOb(verb_id, BHSA)
    book = verb.book[0]
    chapter = verb.chapter[0]
    verse_number = verb.verse[0]
    books.append(book)
    chapters.append(chapter)
    verse_numbers.append(verse_number)
    
    verb_ids_list.append(verb_id)
    
    # Get the index of the clause in the verse
    clause = verb.to_clauses
    clauses = verb.to_verses.to_clauses
    i = clauses.ids.index(clause.ids[0])
    
    clauses_list.append(clause.text)
    
    # Get the surroundings clauses and their text version
    
    # for the preceding clause
    prec_cl = find_prec_cl(i)
    if prec_cl != None:
        pr_cl_text = prec_cl.text
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(find_cl_type(prec_cl)[0])
    else:
        pr_cl_text = None
        prec_cl_list.append(pr_cl_text)
        pr_cl_types_list.append(None)
        
    # for the next clause
    next_cl = find_next_cl(i)
    if next_cl != None:
        next_cl_text = next_cl.text
        next_cl_list.append(next_cl_text)
        n_cl_types_list.append(find_cl_type(next_cl)[0])
    else:
        next_cl_text = None
        next_cl_list.append(next_cl) 
        n_cl_types_list.append(None)
    
    # Get the types of each clause
    cl_types_list.append(find_cl_type(clause)[0])  

In [55]:
# Generate a pandas dataframe with lists

wrong_answers_df = pd.DataFrame({
    "verb_id": verb_ids_list, 
    "book": books,
    "chapter": chapters,
    "verse_num": verse_numbers,
    "clause": clauses_list, 
    "cl_type" : cl_types_list,
    "prec_cl": prec_cl_list,
    "pr_cl_type": pr_cl_types_list,
    "next_cl": next_cl_list,
    "n_cl_type": n_cl_types_list,
             })

In [56]:
wrong_answers_df

Unnamed: 0,verb_id,book,chapter,verse_num,clause,cl_type,prec_cl,pr_cl_type,next_cl,n_cl_type
0,784414,Isaiah,45,13,וְגָלוּתִ֣י יְשַׁלֵּ֔חַ,WxY0,הֽוּא־יִבְנֶ֤ה עִירִי֙,XYqt,לֹ֤א בִמְחִיר֙,Ellp
1,781711,Isaiah,37,3,כִּ֣י בָ֤אוּ בָנִים֙ עַד־מַשְׁבֵּ֔ר,xQtX,יֹום־צָרָ֧ה וְתֹוכֵחָ֛ה וּנְאָצָ֖ה הַיֹּ֣ום הַ...,NmCl,וְכֹ֥חַ אַ֖יִן לְלֵדָֽה׃,NmCl
2,778494,Isaiah,22,15,לֶךְ־,ZIm0,כֹּ֥ה אָמַ֛ר אֲדֹנָ֥י יְהוִ֖ה צְבָאֹ֑ות,xQtX,בֹּא֙ אֶל־הַסֹּכֵ֣ן הַזֶּ֔ה עַל־שֶׁבְנָ֖א,ZIm0
3,786630,Isaiah,55,1,לְכ֣וּ לַמַּ֔יִם,ZIm0,כָּל־צָמֵא֙,Voct,וַלְכ֤וּ,WIm0
4,781186,Isaiah,34,10,לְנֵ֣צַח נְצָחִ֔ים אֵ֥ין עֹבֵ֖ר בָּֽהּ׃,Ptcp,מִדֹּ֤ור לָדֹור֙ תֶּחֱרָ֔ב,xYq0,,
5,787356,Isaiah,58,9,שְׁלַ֥ח אֶצְבַּ֖ע,InfC,אִם־תָּסִ֤יר מִתֹּֽוכְךָ֙ מֹוטָ֔ה,xYq0,וְדַבֶּר־אָֽוֶן׃,InfA
6,784799,Isaiah,47,5,וּבֹ֥אִי בַחֹ֖שֶׁךְ,WIm0,שְׁבִ֥י דוּמָ֛ם,ZIm0,בַּת־כַּשְׂדִּ֑ים,Voct
7,781316,Isaiah,35,4,הִנֵּ֤ה אֱלֹֽהֵיכֶם֙ נָקָ֣ם יָבֹ֔וא גְּמ֣וּל א...,XYqt,אַל־תִּירָ֑אוּ,xYq0,ה֥וּא יָבֹ֖וא,XYqt
8,787860,Isaiah,60,11,לְהָבִ֤יא אֵלַ֨יִךְ֙ חֵ֣יל גֹּויִ֔ם,InfC,יֹומָ֥ם וָלַ֖יְלָה לֹ֣א יִסָּגֵ֑רוּ,xYq0,וּמַלְכֵיהֶ֖ם נְהוּגִֽים׃,Ptcp
9,780207,Isaiah,30,8,עַתָּ֗ה בֹּ֣וא,xIm0,,,כָתְבָ֥הּ עַל־ל֛וּחַ אִתָּ֖ם,ZIm0
