# Word Usage

Determines New Testament word usage.

## Define Files Names

In [1]:
LEXEMES_CSV = "lexemes.csv"
MORPHGNT_CSV = "morphgnt.csv"
MOUNCE_TXT = "../BibleCore/resources/mounce.txt"

## Analyze New Testament

In [2]:
from biblesdk.classes import Analyzer

ANALYZER = Analyzer(200)

ANALYZER.load_data(MORPHGNT_CSV, LEXEMES_CSV, MOUNCE_TXT)

new_testament_report = ANALYZER.get_new_testament_report()

new_testament_report_styler = new_testament_report.get_styler()
new_testament_report_styler

Rank,Part of Speech,Lexical Entry,Gloss,Strongs,GK,Mounce,Word Count
1,Definite Article,"ὁ, ἡ, τό",the,3588,3836,6.0,19769
2,Conjunction,καί,"and, even, also, namely",2532,2779,4.0,8973
3,Pronoun - Personal,"αὐτός, αὐτή, αὐτό","he, she, it, they, them, same",846,0899,6.0,5546
4,Pronoun - Personal,"σύ, σοῦ, σοί, σέ",you,4771,5148,7.0,2894
5,Conjunction,δέ,"but, on the other hand, and",1161,1254,6.0,2766
6,Preposition,ἐν,"in, on, among (dat)",1722,1877,6.0,2733
7,Pronoun - Personal,ἐγώ,I,1473,1609,4.0,2572
8,Verb,εἰμί,"I am, exist",1510,1639,8.0,2456
9,Verb,λέγω,"I say, speak",3004,3306,78816.0,2345
10,Preposition,εἰς,"into, in, among, till, for (acc)",1519,1650,7.0,1754


## Analyze Book

In [3]:
ANALYZER = Analyzer(200)

ANALYZER.load_data(MORPHGNT_CSV, LEXEMES_CSV, MOUNCE_TXT)

book_report = ANALYZER.get_book_report(1, 1, add_nt_word_index=True)

report_styler = book_report.get_styler()
report_styler

Rank,NT Rank,Part of Speech,Lexical Entry,Gloss,Strongs,GK,Mounce,Word Count
1,1,Definite Article,"ὁ, ἡ, τό",the,3588.0,3836,6.0,76
2,5,Conjunction,δέ,"but, on the other hand, and",1161.0,1254,6.0,44
3,173,Verb,γεννάω,"I beget, bring forth, give birth to",1080.0,1164,19.0,41
4,3,Pronoun - Personal,"αὐτός, αὐτή, αὐτό","he, she, it, they, them, same",846.0,0899,6.0,19
5,2,Conjunction,καί,"and, even, also, namely",2532.0,2779,4.0,12
6,19,Preposition,"ἐκ, ἐξ","from out, out from among, from (gen)",1537.0,1666,8.0,7
7,274,Noun,"Δαυίδ, ὁ",David,1138.0,1253,4.0,6
8,48,Noun,"υἱός, οῦ, ὁ","a son, descendent",5207.0,5626,7.0,6
9,28,Preposition,ἀπό,"from, away from (gen)",575.0,0608,8.0,5
10,411,Noun,"Ἰωσήφ, ὁ",Joseph,2501.0,2737,,5


## Create Usage Reports

In [4]:
from pathlib import Path
import unicodedata
import biblesdk.constants as bc
from biblesdk.books import BOOKS


def strip_diacritics(series):
    return series.apply(lambda value: unicodedata.normalize("NFKD", value))


ANALYZER = Analyzer(200)
ANALYZER.load_data(MORPHGNT_CSV, LEXEMES_CSV, MOUNCE_TXT)
Path("reports").mkdir(exist_ok=True)

# Create New Testament summary report.
#
new_testament_report = ANALYZER.get_new_testament_report()

report_html = f"<h2>New Testament</h2>"
for property_name, property_value in new_testament_report.properties.items():
    report_html += f"<p>{property_name}: {property_value}"

report_html += "<h3>By Ranking</h3>"
new_testament_report_styler = new_testament_report.get_styler()
report_html += new_testament_report_styler.to_html()

report_html += "<h3>By Part of Speech</h3>"
new_testament_report.df.sort_values(
    [bc.PART_OF_SPEECH, bc.LEXICAL_ENTRY], inplace=True, key=strip_diacritics
)
new_testament_report_styler = new_testament_report.get_styler()
report_html += new_testament_report_styler.to_html()

with open("reports/words_00_new_testament.html", "w", encoding="utf-8") as file:
    file.write(report_html)

# Create book detail reports.
#
for book_number in BOOKS:
    book_name = BOOKS[book_number]
    book_report = ANALYZER.get_book_report(book_number, add_nt_word_index=True)

    report_html = f"<h2>{book_name}</h2>"
    for property_name, property_value in book_report.properties.items():
        report_html += f"<p>{property_name}: {property_value}"

    report_html += "<h3>By Ranking</h3>"
    report_styler = book_report.get_styler()
    report_html += report_styler.to_html()

    report_html += "<h3>New Words</h3>"
    book_report.df.sort_values(
        [bc.PART_OF_SPEECH, bc.LEXICAL_ENTRY], inplace=True, key=strip_diacritics
    )
    book_report.df.drop(
        book_report.df[book_report.df[bc.NEW_TESTAMENT_WORD_INDEX] <= 200].index,
        inplace=True,
    )
    report_styler = book_report.get_styler(highlight_nt_rank=False)
    report_html += report_styler.to_html()

    with open(
        f"reports/words_{book_number:02d}_{book_name}.html", "w", encoding="utf-8"
    ) as file:
        file.write(report_html)