In [5]:
import pandas as pd

# Specify the path to your CSV file
csv_file_path = '/home/rancher/projects/Bible/dataset/t_kjv.csv'

# Read the CSV file into a dataframe
df = pd.read_csv(csv_file_path)

# Now you can work with the dataframe
# For example, you can print the first few rows
print(df.head())


        id  b  c  v                                                  t
0  1001001  1  1  1  In the beginning God created the heaven and th...
1  1001002  1  1  2  And the earth was without form, and void; and ...
2  1001003  1  1  3  And God said, Let there be light: and there wa...
3  1001004  1  1  4  And God saw the light, that it was good: and G...
4  1001005  1  1  5  And God called the light Day, and the darkness...


## Load genres, Bible version dataset, and genre

In [8]:
genres = pd.read_csv('/home/rancher/projects/Bible/dataset/key_genre_english.csv')
kjv_bible = pd.read_csv('/home/rancher/projects/Bible/dataset/t_kjv.csv')
book = pd.read_csv('/home/rancher/projects/Bible/dataset/key_english.csv')

print(genres.head())

   g         n
0  1       Law
1  2   History
2  3    Wisdom
3  4  Prophets
4  5   Gospels


## Merge book, version, genre on genre

In [9]:
book = pd.merge(book, genres, on='g')
print(book.head())

   b          n_x   t  g  n_y
0  1      Genesis  OT  1  Law
1  2       Exodus  OT  1  Law
2  3    Leviticus  OT  1  Law
3  4      Numbers  OT  1  Law
4  5  Deuteronomy  OT  1  Law


In [10]:
df = pd.merge(kjv_bible, book, left_on = 'b', right_on = 'b')
df.head()

Unnamed: 0,id,b,c,v,t_x,n_x,t_y,g,n_y
0,1001001,1,1,1,In the beginning God created the heaven and th...,Genesis,OT,1,Law
1,1001002,1,1,2,"And the earth was without form, and void; and ...",Genesis,OT,1,Law
2,1001003,1,1,3,"And God said, Let there be light: and there wa...",Genesis,OT,1,Law
3,1001004,1,1,4,"And God saw the light, that it was good: and G...",Genesis,OT,1,Law
4,1001005,1,1,5,"And God called the light Day, and the darkness...",Genesis,OT,1,Law


## Rename column headers

In [16]:
df = df.rename(columns={'t_x': 'content', 'n_x' : 'book', 't_y' : 'testament', 'n_y' : 'genre'})
df.head()

Unnamed: 0,id,b,c,v,content,book,testament,g,genre
0,1001001,1,1,1,In the beginning God created the heaven and th...,Genesis,OT,1,Law
1,1001002,1,1,2,"And the earth was without form, and void; and ...",Genesis,OT,1,Law
2,1001003,1,1,3,"And God said, Let there be light: and there wa...",Genesis,OT,1,Law
3,1001004,1,1,4,"And God saw the light, that it was good: and G...",Genesis,OT,1,Law
4,1001005,1,1,5,"And God called the light Day, and the darkness...",Genesis,OT,1,Law


In [None]:
column_headers = df.columns.tolist()

print(column_headers)

## Search for a specific term (return count)

In [26]:
# Insert string that you want to look for
search_term = 'Jesus'
# Insert the specific book number from column 'b' to search within
items_to_search = [40]

# Boolean indexing to filter rows based on the specified conditions
filtered_df = df[df['b'].isin(items_to_search) & df['content'].str.contains(search_term, case=False)]

# Print the filtered dataframe
print(filtered_df)

# Assuming you have a filtered dataframe named 'filtered_df'
output_file = '/home/rancher/projects/Bible/dataset/output/filtered.csv'  # Specify the filename and path for the output file

# Write the filtered dataframe to a CSV file
filtered_df.to_csv(output_file, index=False)  # Set index=False to exclude the index column


             id   b   c   v  \
23145  40001001  40   1   1   
23160  40001016  40   1  16   
23162  40001018  40   1  18   
23165  40001021  40   1  21   
23169  40001025  40   1  25   
...         ...  ..  ..  ..   
24200  40028005  40  28   5   
24204  40028009  40  28   9   
24205  40028010  40  28  10   
24211  40028016  40  28  16   
24213  40028018  40  28  18   

                                                 content     book testament  \
23145  The book of the generation of Jesus Christ, th...  Matthew        NT   
23160  And Jacob begat Joseph the husband of Mary, of...  Matthew        NT   
23162  Now the birth of Jesus Christ was on this wise...  Matthew        NT   
23165  And she shall bring forth a son, and thou shal...  Matthew        NT   
23169  And knew her not till she had brought forth he...  Matthew        NT   
...                                                  ...      ...       ...   
24200  And the angel answered and said unto the women...  Matthew        N

In [15]:
df.groupby(by=['book', 'testament', 'genre']).agg({
    'c' : 'max',
    'v' : 'count'
}).reset_index().sort_values(['v'], ascending = False)

Unnamed: 0,book,testament,genre,c,v
58,Psalms,OT,Wisdom,150,2461
29,Genesis,OT,Law,50,1533
36,Jeremiah,OT,Prophets,52,1364
34,Isaiah,OT,Prophets,66,1292
53,Numbers,OT,Law,36,1288
...,...,...,...,...,...
55,Philemon,NT,Epistles,1,25
42,Jude,NT,Epistles,1,25
54,Obadiah,OT,Prophets,1,21
16,3 John,NT,Epistles,1,15
