# Scriptura Notebook

This notebook will be used to convert add the data we want to include in the scriptura db and adapt them to the correct format generating CSV files

#### Install dependencies

In [None]:
%pip install pandas

#### Import dependencies and utils

In [None]:
import pandas as pd

import secrets
import string
import json

#### Define utils functions

In [None]:
def uid(length):
    alphabet = string.ascii_letters + string.digits
    return ''.join(secrets.choice(alphabet) for _ in range(length))

def to_kebab_case(s):
    return s.replace(' ', '-').lower()

### Encyclopedia

In [None]:
# Fetch theographic
!git clone https://github.com/robertrouse/theographic-bible-metadata.git ./dist/theographic

##### Bible structure

In [None]:
# Testaments

testaments = {
    "id": [1, 2],
    "uid": [uid(12), uid(12)],
    "name": ["Old Testament", "New Testament"],
    "slug": ["old", "new"],
    "code": ["OT", "NT"],
}

df = pd.DataFrame(testaments)

df.to_csv("output/testament.csv", index=False)

print(df)

In [None]:
# Book divisions

df = pd.read_csv('dist/theographic/CSV/Books.csv')


df = df.groupby(['bookDiv'], sort=False).count()

df = df.reset_index(names=['name'])
df = df.reset_index(names=['id'])
df['id'] = df['id'].apply(lambda x: x+1)
df['uid'] = [uid(12) for _ in range(len(df))]

df = df[['id', 'uid', 'name']]

df['slug'] = df['name'].apply(lambda n: to_kebab_case(n))
df['testament_id'] = [1 if x <= 4 else 2 for x in range(len(df))]

df.to_csv("output/book_division.csv", index=False)

print(df)

In [None]:
# Book

bd_df = pd.read_csv('output/book_division.csv')

bd_df.rename(columns={'id':'book_division_id'}, inplace=True)
bd_df = bd_df[['book_division_id', 'name']]

df = pd.read_csv('dist/theographic/CSV/Books.csv')

rename_dict = {
    'bookOrder': 'id', 
    'bookName': 'name',
    'shortName': 'short_name',
    'yearWritten': 'year_written',
}

df.rename(columns=rename_dict, inplace=True)

df = df.join(bd_df.set_index('name'), on='bookDiv')

df['uid'] = [uid(12) for _ in range(len(df))]

df = df[['id','uid', 'name', 'slug', 'short_name', 'year_written', 'book_division_id']]

df['testament_id'] = [1 if x < 39 else 2 for x in range(len(df))]

df.to_csv("output/book.csv", index=False)

print(df)

In [19]:
# Chapter

b_df = pd.read_csv('dist/theographic/CSV/Books.csv')

chapters = []

for index, book in b_df.iterrows():
    for i in range(1, book['chapterCount'] + 1):
        chapters.append({
            'book_id': index + 1,
            'chapter_num': i,
        })

df = pd.DataFrame(chapters)

df = df.reset_index(names=['id'])
df['id'] = df['id'].apply(lambda x: x+1)
df['uid'] = [uid(12) for _ in range(len(df))]

df = df[['id', 'uid', 'book_id', 'chapter_num']]

df.to_csv("output/chapter.csv", index=False)

print(df)

        id           uid  book_id  chapter_num
0        1  i5cHtqTXgKZt        1            1
1        2  Oiduc64kQFcX        1            2
2        3  lw2nTblDIFlV        1            3
3        4  aIoI3Am1STzy        1            4
4        5  kHJvqlJQBBtJ        1            5
...    ...           ...      ...          ...
1184  1185  eaimTpRItCgA       66           18
1185  1186  XhacGOuUFF2a       66           19
1186  1187  bVlXie14fmzt       66           20
1187  1188  7ojjx6x3mmVI       66           21
1188  1189  QWhd69dwAgy1       66           22

[1189 rows x 4 columns]


In [18]:

# Verse

with open("dist/theographic/json/verses.json", "r") as file:
    verses = json.load(file)

with open("dist/theographic/json/books.json", "r") as file:
    books = json.load(file)

with open("dist/theographic/json/chapters.json", "r") as file:
    chapters = json.load(file)

parsed_verses = []

books_uuids = list(book["id"] for book in books)

for i, verse in enumerate(verses):
    book_id = books_uuids.index(verse["fields"]["book"][0]) + 1

    parsed_verses.append({
        'id': i + 1,
        'book_id': book_id,
        'chapter_id': None,
        'theographic_id': verse["id"],
        'chapter_num': int(verse["fields"]["verseID"][2:5]),
        'verse_num': int(verse["fields"]["verseID"][5:]),
        'year': verse.get("fields", {}).get("yearNum", None),
        'status': verse["fields"]["status"],
    })

df = pd.DataFrame(parsed_verses)

print(df)


          id  book_id chapter_id     theographic_id  chapter_num  verse_num  \
0          1        1       None  rec7mkRLwey2ntUG9            1          1   
1          2        1       None  rec7DRaTJV5ANnvEj            1          2   
2          3        1       None  recxtLDOVbJSsI8iR            1          3   
3          4        1       None  recw6dtvOn3H9jJe6            1          4   
4          5        1       None  recB8C8aeTDYkHjKZ            1          5   
...      ...      ...        ...                ...          ...        ...   
31097  31098       66       None  recVNA6ojuOF4V66a           22         17   
31098  31099       66       None  rec8zqyVi1XnnkcYN           22         18   
31099  31100       66       None  recb6AU07t1PvNroS           22         19   
31100  31101       66       None  recuxhlLPzgp2xNXh           22         20   
31101  31102       66       None  recogsGhS7vA94UZh           22         21   

         year   status  
0     -4004.0  publish  
1

#### Todo.. rest of structure