In [1]:
# imports data
import pandas as pd
bible_db = pd.read_csv ('bible_db.csv')
bible_db

Unnamed: 0.1,Unnamed: 0,Chapter,Book,verse_num,cleaned_verse
0,0,1,Genesis,1,In the beginning when God created[a] the heave...
1,1,1,Genesis,2,the earth was a formless void and darkness cov...
2,2,1,Genesis,3,"Then God said, “Let there be light”; and there..."
3,3,1,Genesis,4,And God saw that the light was good; and God s...
4,4,1,Genesis,5,"God called the light Day, and the darkness he ..."
...,...,...,...,...,...
37656,37656,18,4 Maccabees,20,O bitter was that day—and yet not bitter—when ...
37657,37657,18,4 Maccabees,21,pierced the pupils of their eyes and cut out t...
37658,37658,18,4 Maccabees,22,For these crimes divine justice pursued and wi...
37659,37659,18,4 Maccabees,23,But the sons of Abraham with their victorious ...


In [2]:
# gets books with the most verses
bible_db.groupby('Book').count().sort_values('Chapter').tail(15)

Unnamed: 0_level_0,Unnamed: 0,Chapter,verse_num,cleaned_verse
Book,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1 Chronicles,942,942,942,942
2 Esdras,944,944,944,944
Deuteronomy,959,959,959,959
Acts,1003,1003,1003,1003
Matthew,1068,1068,1068,1068
Job,1070,1070,1070,1070
Luke,1147,1147,1147,1147
Exodus,1213,1213,1213,1213
Ezekiel,1273,1273,1273,1273
Numbers,1288,1288,1288,1288


In [3]:
# selects books based on sections of the Bible as defined in https://www.atikaschool.org/kcsecrenotes/major-divisions-of-the-bible
books = ('Mark', 'Psalms', 'Romans', 'Ruth', 'Leviticus', 'Jeremiah', 'Ezra', 'Joshua', 'Colossians', 'Galatians')
bible_sectioned = bible_db[bible_db.Book.isin(books)]
bible_sectioned

Unnamed: 0.1,Unnamed: 0,Chapter,Book,verse_num,cleaned_verse
2746,2746,1,Leviticus,1,The Lord summoned Moses and spoke to him from ...
2747,2747,1,Leviticus,2,Speak to the people of Israel and say to them:...
2748,2748,1,Leviticus,3,If the offering is a burnt offering from the h...
2749,2749,1,Leviticus,4,You shall lay your hand on the head of the bur...
2750,2750,1,Leviticus,5,The bull shall be slaughtered before the Lord;...
...,...,...,...,...,...
29536,29536,4,Colossians,14,"Luke, the beloved physician, and Demas greet you."
29537,29537,4,Colossians,15,Give my greetings to the brothers and sisters[...
29538,29538,4,Colossians,16,"And when this letter has been read among you, ..."
29539,29539,4,Colossians,17,"And say to Archippus, “See that you complete t..."


In [4]:
# adds category of books
def book_type(row):
    if row['Book'] == 'Mark':
        return 'Gospel'
    elif row['Book'] == 'Psalms':
        return 'Poems'
    elif row['Book'] == 'Romans' or row['Book'] == 'Colossians' or row['Book'] == 'Galatians':
        return 'Letters'
    elif row['Book'] == 'Ruth' or row['Book'] == 'Ezra' or row['Book'] == 'Joshua':
        return 'Historical'
    elif row['Book'] == 'Leviticus':
        return 'Law'
    elif row['Book'] == 'Jeremiah':
        return 'Prophets'


bible_sectioned['Category'] = bible_sectioned.apply(book_type, axis = 1)
bible_sectioned

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bible_sectioned['Category'] = bible_sectioned.apply(book_type, axis = 1)


Unnamed: 0.1,Unnamed: 0,Chapter,Book,verse_num,cleaned_verse,Category
2746,2746,1,Leviticus,1,The Lord summoned Moses and spoke to him from ...,Law
2747,2747,1,Leviticus,2,Speak to the people of Israel and say to them:...,Law
2748,2748,1,Leviticus,3,If the offering is a burnt offering from the h...,Law
2749,2749,1,Leviticus,4,You shall lay your hand on the head of the bur...,Law
2750,2750,1,Leviticus,5,The bull shall be slaughtered before the Lord;...,Law
...,...,...,...,...,...,...
29536,29536,4,Colossians,14,"Luke, the beloved physician, and Demas greet you.",Letters
29537,29537,4,Colossians,15,Give my greetings to the brothers and sisters[...,Letters
29538,29538,4,Colossians,16,"And when this letter has been read among you, ...",Letters
29539,29539,4,Colossians,17,"And say to Archippus, “See that you complete t...",Letters


In [5]:
# visualizes the data distribution

import altair as alt
bars = alt.Chart(bible_sectioned).mark_bar(size=50).encode(
    x=alt.X("Category"),
    y=alt.Y("count():Q", axis=alt.Axis(title='Number of Verses')),
    tooltip=[alt.Tooltip('count()', title='Number of Verse'), 'Category'],
    color='Category'

)

text = bars.mark_text(
    align='center',
    baseline='bottom',
).encode(
    text='count()'
)

(bars + text).interactive().properties(
    height=300, 
    width=700,
    title = "Number of Verses in each category",
)

In [6]:
bible_sectioned.to_csv('bible_training_data.csv')