In [None]:
import json
import pandas as pd
import plotly.express as px

In [3]:
# Load the JSON file
with open("tr.json", "r", encoding="utf-8") as file:
    tr = json.load(file)

# Check the structure
print(type(tr))  # dict or list
print(tr.keys() if isinstance(tr, dict) else len(tr))


<class 'dict'>
dict_keys(['metadata', 'verses'])


In [6]:
tr['metadata']

{'name': 'Textus Receptus NT',
 'shortname': 'TR',
 'module': 'tr',
 'year': '1550 / 1884',
 'publisher': None,
 'owner': None,
 'description': "<b>Greek NT: Textus Receptus (1550/1894)</b><p />The Textus Receptus; base text is Stephens 1550, with variants of Scrivener 1894.<br /><br />This Bible imported from The Unbound Bible <a href='http://unbound.biola.edu/'>http://unbound.biola.edu/</a>",
 'lang': 'Greek',
 'lang_short': 'grc',
 'copyright': 0,
 'copyright_statement': 'This Bible is in the Public Domain.',
 'url': None,
 'citation_limit': 0,
 'restrict': 0,
 'italics': 0,
 'strongs': 0,
 'red_letter': 0,
 'paragraph': 0,
 'official': 1,
 'research': 0,
 'module_version': '5.0.0alpha1'}

In [9]:
print(tr['verses'][0].keys())

dict_keys(['book_name', 'book', 'chapter', 'verse', 'text'])


In [10]:
# Extract the "verses" data
if "verses" in tr:
    tr_df = pd.DataFrame(tr["verses"])  # Convert the list of verses to a DataFrame
    display(tr_df)  # Display the DataFrame in Jupyter Notebook
else:
    print("Error: 'verses' key not found in JSON data")


Unnamed: 0,book_name,book,chapter,verse,text
0,Matthew,40,1,1,βιβλος γενεσεως ιησου χριστου υιου δαβιδ υιου ...
1,Matthew,40,1,2,αβρααμ εγεννησεν τον ισαακ ισαακ δε εγεννησεν ...
2,Matthew,40,1,3,ιουδας δε εγεννησεν τον φαρες και τον ζαρα εκ ...
3,Matthew,40,1,4,αραμ δε εγεννησεν τον αμιναδαβ αμιναδαβ δε εγε...
4,Matthew,40,1,5,σαλμων δε εγεννησεν τον βοοζ εκ της ραχαβ βοοζ...
...,...,...,...,...,...
7953,Revelation,66,22,17,και το πνευμα και η νυμφη λεγουσιν ελθε και ο ...
7954,Revelation,66,22,18,συμμαρτυρουμαι γαρ παντι ακουοντι τους λογους ...
7955,Revelation,66,22,19,και εαν τις αφαιρη απο των λογων βιβλου της πρ...
7956,Revelation,66,22,20,λεγει ο μαρτυρων ταυτα ναι ερχομαι ταχυ αμην ν...


In [37]:
# print(json.dumps(tr["verses"][:3], indent=4))  # Prints the first 3 entries neatly formatted

In [12]:
print(tr_df.iloc[0])

book_name                                              Matthew
book                                                        40
chapter                                                      1
verse                                                        1
text         βιβλος γενεσεως ιησου χριστου υιου δαβιδ υιου ...
Name: 0, dtype: object


In [13]:
tr_df

Unnamed: 0,book_name,book,chapter,verse,text
0,Matthew,40,1,1,βιβλος γενεσεως ιησου χριστου υιου δαβιδ υιου ...
1,Matthew,40,1,2,αβρααμ εγεννησεν τον ισαακ ισαακ δε εγεννησεν ...
2,Matthew,40,1,3,ιουδας δε εγεννησεν τον φαρες και τον ζαρα εκ ...
3,Matthew,40,1,4,αραμ δε εγεννησεν τον αμιναδαβ αμιναδαβ δε εγε...
4,Matthew,40,1,5,σαλμων δε εγεννησεν τον βοοζ εκ της ραχαβ βοοζ...
...,...,...,...,...,...
7953,Revelation,66,22,17,και το πνευμα και η νυμφη λεγουσιν ελθε και ο ...
7954,Revelation,66,22,18,συμμαρτυρουμαι γαρ παντι ακουοντι τους λογους ...
7955,Revelation,66,22,19,και εαν τις αφαιρη απο των λογων βιβλου της πρ...
7956,Revelation,66,22,20,λεγει ο μαρτυρων ταυτα ναι ερχομαι ταχυ αμην ν...


In [14]:
# tr_df.text[5000]

In [15]:
# Calculate word count and character count

# For word count
tr_df['word_count'] = tr_df['text'].str.split().str.len()

# For character count (excluding spaces)
tr_df['char_count'] = tr_df['text'].str.replace(" ", "", regex=False).str.len()


In [16]:
tr_df

Unnamed: 0,book_name,book,chapter,verse,text,word_count,char_count
0,Matthew,40,1,1,βιβλος γενεσεως ιησου χριστου υιου δαβιδ υιου ...,8,45
1,Matthew,40,1,2,αβρααμ εγεννησεν τον ισαακ ισαακ δε εγεννησεν ...,18,92
2,Matthew,40,1,3,ιουδας δε εγεννησεν τον φαρες και τον ζαρα εκ ...,21,92
3,Matthew,40,1,4,αραμ δε εγεννησεν τον αμιναδαβ αμιναδαβ δε εγε...,15,82
4,Matthew,40,1,5,σαλμων δε εγεννησεν τον βοοζ εκ της ραχαβ βοοζ...,21,89
...,...,...,...,...,...,...,...
7953,Revelation,66,22,17,και το πνευμα και η νυμφη λεγουσιν ελθε και ο ...,25,101
7954,Revelation,66,22,18,συμμαρτυρουμαι γαρ παντι ακουοντι τους λογους ...,28,147
7955,Revelation,66,22,19,και εαν τις αφαιρη απο των λογων βιβλου της πρ...,33,145
7956,Revelation,66,22,20,λεγει ο μαρτυρων ταυτα ναι ερχομαι ταχυ αμην ν...,12,55


In [17]:
tr_df['book_name'].unique()

array(['Matthew', 'Mark', 'Luke', 'John', 'Acts', 'Romans',
       '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians',
       'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians',
       '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews', 'James',
       '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude',
       'Revelation'], dtype=object)

In [18]:
# Sum word_count and char_count for each book_name
book_sums = tr_df.groupby('book_name')[['word_count', 'char_count']].sum().reset_index()
print(book_sums)


          book_name  word_count  char_count
0     1 Corinthians        6963       33335
1            1 John        2201        9699
2           1 Peter        1743        9300
3   1 Thessalonians        1511        7559
4         1 Timothy        1657        9159
5     2 Corinthians        4560       22651
6            2 John         259        1191
7           2 Peter        1128        6175
8   2 Thessalonians         853        4175
9         2 Timothy        1282        6754
10           3 John         221        1114
11             Acts       18967       98088
12       Colossians        1646        8131
13        Ephesians        2476       12240
14        Galatians        2262       11243
15          Hebrews        5037       26758
16            James        1802        9090
17             John       16003       72731
18             Jude         463        2581
19             Luke       20076       98628
20             Mark       11724       58415
21          Matthew       18849 

In [19]:
type(book_sums)

pandas.core.frame.DataFrame

In [20]:
book_sums.sort_values(by='char_count', ascending=False)

Unnamed: 0,book_name,word_count,char_count
19,Luke,20076,98628
11,Acts,18967,98088
21,Matthew,18849,92322
17,John,16003,72731
20,Mark,11724,58415
24,Revelation,10113,47202
25,Romans,7249,35115
0,1 Corinthians,6963,33335
15,Hebrews,5037,26758
5,2 Corinthians,4560,22651


In [21]:
# Create the mapping dictionary
author_mapping = {
    'Matthew': 'Matthew',
    'Mark': 'Mark',
    'Luke': 'Luke',
    'John': 'John',
    'Acts': 'Luke',
    'Romans': 'Paul',
    '1 Corinthians': 'Paul',
    '2 Corinthians': 'Paul',
    'Galatians': 'Paul',
    'Ephesians': 'Paul',
    'Philippians': 'Paul',
    'Colossians': 'Paul',
    '1 Thessalonians': 'Paul',
    '2 Thessalonians': 'Paul',
    '1 Timothy': 'Paul',
    '2 Timothy': 'Paul',
    'Titus': 'Paul',
    'Philemon': 'Paul',
    'Hebrews': 'Unknown',
    'James': 'James',
    '1 Peter': 'Peter',
    '2 Peter': 'Peter',
    '1 John': 'John',
    '2 John': 'John',
    '3 John': 'John',
    'Jude': 'Jude',
    'Revelation': 'John'
}


In [22]:
# Apply the mapping to create the 'author' column in book_sums DataFrame
book_sums['author'] = book_sums['book_name'].map(author_mapping)

In [23]:
book_sums

Unnamed: 0,book_name,word_count,char_count,author
0,1 Corinthians,6963,33335,Paul
1,1 John,2201,9699,John
2,1 Peter,1743,9300,Peter
3,1 Thessalonians,1511,7559,Paul
4,1 Timothy,1657,9159,Paul
5,2 Corinthians,4560,22651,Paul
6,2 John,259,1191,John
7,2 Peter,1128,6175,Peter
8,2 Thessalonians,853,4175,Paul
9,2 Timothy,1282,6754,Paul


In [24]:
# Create a dataframe grouped by author
author_sums = book_sums.groupby('author')[['word_count', 'char_count']].sum().reset_index()

In [25]:
author_sums.sort_values(by='char_count', ascending=False)

Unnamed: 0,author,word_count,char_count
3,Luke,39043,196716
6,Paul,33158,164028
1,John,28797,131937
5,Matthew,18849,92322
4,Mark,11724,58415
8,Unknown,5037,26758
7,Peter,2871,15475
0,James,1802,9090
2,Jude,463,2581


In [65]:
# from author_sums

# Calculate the percentage of char_count
author_sums["char_count_percent"] = (author_sums["char_count"] / author_sums["char_count"].sum()) * 100

# Round the percentage to one decimal place
author_sums["char_count_percent"] = author_sums["char_count_percent"].round(1)

# Sort the dataframe by char_count_percent in descending order
author_sums_sorted = author_sums.sort_values(by="char_count_percent", ascending=False)

author_sums_sorted

Unnamed: 0,author,word_count,char_count,char_count_percent
3,Luke,39043,196716,28.2
6,Paul,33158,164028,23.5
1,John,28797,131937,18.9
5,Matthew,18849,92322,13.2
4,Mark,11724,58415,8.4
8,Unknown,5037,26758,3.8
7,Peter,2871,15475,2.2
0,James,1802,9090,1.3
2,Jude,463,2581,0.4


In [28]:
# Sort the data by 'char_count' in descending order
author_sums_sorted = author_sums.sort_values(by='char_count', ascending=True)

fig = px.bar(
    author_sums_sorted,
    x='char_count',
    y='author',
    orientation='h',
    title='Total Character Counts by Author (Sorted)',
    labels={'char_count': 'Character Count', 'author': 'Author'}
)
fig.show()


In [29]:
genre_mapping = {
    'Matthew': 'History',
    'Mark': 'History',
    'Luke': 'History',
    'John': 'History',
    'Acts': 'History',
    'Romans': 'Letters',
    '1 Corinthians': 'Letters',
    '2 Corinthians': 'Letters',
    'Galatians': 'Letters',
    'Ephesians': 'Letters',
    'Philippians': 'Letters',
    'Colossians': 'Letters',
    '1 Thessalonians': 'Letters',
    '2 Thessalonians': 'Letters',
    '1 Timothy': 'Letters',
    '2 Timothy': 'Letters',
    'Titus': 'Letters',
    'Philemon': 'Letters',
    'Hebrews': 'Letters',
    'James': 'Letters',
    '1 Peter': 'Letters',
    '2 Peter': 'Letters',
    '1 John': 'Letters',
    '2 John': 'Letters',
    '3 John': 'Letters',
    'Jude': 'Letters',
    'Revelation': 'Prophecy'
}


In [30]:
# Apply the mapping to create the 'author' column in book_sums DataFrame
book_sums['genre'] = book_sums['book_name'].map(genre_mapping)

In [31]:
book_sums

Unnamed: 0,book_name,word_count,char_count,author,genre
0,1 Corinthians,6963,33335,Paul,Letters
1,1 John,2201,9699,John,Letters
2,1 Peter,1743,9300,Peter,Letters
3,1 Thessalonians,1511,7559,Paul,Letters
4,1 Timothy,1657,9159,Paul,Letters
5,2 Corinthians,4560,22651,Paul,Letters
6,2 John,259,1191,John,Letters
7,2 Peter,1128,6175,Peter,Letters
8,2 Thessalonians,853,4175,Paul,Letters
9,2 Timothy,1282,6754,Paul,Letters


In [36]:
import plotly.express as px

# Define the custom color mapping for genres
color_map = {
    'History': '#a02013',  # dark red
    'Letters': '#42664a',  # dark green
    'Prophecy': '#044e7f'  # dark blue
}

# 1) Group by author to find total char_count per author
author_char_sums = book_sums.groupby('author')['char_count'].sum()

# 2) Sort authors by their total char_count (descending so largest is at the top)
author_order = author_char_sums.sort_values(ascending=True).index.tolist()

# Sort the DataFrame by char_count if you like,
# but the final order of authors will be determined by `categoryarray`.
book_sums_genre_sorted = book_sums.sort_values(by='char_count', ascending=False)

# Create the bar chart
fig = px.bar(
    book_sums_genre_sorted,
    x='char_count',
    y='author',
    orientation='h',
    color='genre',
    color_discrete_map=color_map,
    title='Total Character Counts of <i>Textus Receptus</i> by Author',
    labels={
        'book_name': 'Book',
        'author': 'Author',
        'genre': 'Genre',
        'word_count': 'Word Count',
        'char_count': 'Character Count'
    },
    # Columns for default tooltips
    hover_data=['book_name', 'author', 'genre', 'word_count', 'char_count'],
    # Columns to reference with customdata in the hovertemplate
    custom_data=['book_name', 'author', 'genre', 'word_count', 'char_count']
)

# 3) Apply the custom ordering to the y-axis
fig.update_layout(
    yaxis=dict(
        categoryorder='array',
        categoryarray=author_order  # Our sorted list of authors
    )
)

# 4) Define a custom hovertemplate that uses the order in `custom_data`
fig.update_traces(
    hovertemplate=
    "<b>%{customdata[2]}</b><br>" +         # Author
    "Book: %{customdata[0]}<br>" +         # Book Name
    "Author: %{customdata[1]}<br>" +        # Genre
    "Word Count: %{customdata[3]:,}<br>" +   # Word Count
    "Character Count: %{customdata[4]:,}<extra></extra>"  # Char Count
)

fig.show()
