# 0. Introduction
Overview of methods implemented in the DRHWrapper for Python. For most use-cases the answerset will most important. This endpoint is detailed in section X. 

In [1]:
from drhwrapper import DRHWrapper
from datetime import datetime
from itertools import islice
import pandas as pd 
drh = DRHWrapper()

# 1. List endpoints
`list` methods provide less information than `find` methods. They are useful to get an overview because multiple (or all) records can be obtained through 1 call to the API. There are list endpoints for the following data: 
 
- entries (`list_entries`)
- entry tags (`list_entry_tags`)
- regions (`list_regions`)
- region tags (`list_region_tags`)

`list` methods share some common arguments: 
- `limit`: maximum number of (entry, region, etc.) to return
- `offset`: offset from first match
- `start_date`, `end_date`: published after start_date and before end_date
- `ordering`: feature to order by.

Other arguments are specific to endpoints, e.g.: 
- `expert`, `created_by`, `approved`, `poll`, `region`, etc.

By default, `list_endpoints` will return `pd.DataFrame`. This behavior can be overwritten by setting `to_dataframe` to `False` (returning `dict`)

## 1.1. List entries
List entries provides meta-data (e.g., date-range, region, expert, etc.) for entries in the DRH. This allows for quick and easy access to metadata for all entries in the DRH ()

In [2]:
# default behavior
listed_entries = drh.list_entries()  
listed_entries.head(5)

Unnamed: 0,entry_id,entry_name,expert_id,expert_name,poll_id,poll_name,date_created,year_from,year_to,region_id,region_name,tags
0,15,Northern Song,73,Sarah Primmer,8,Polity,2014-04-17T05:00:00Z,960,1127,113,Northern Song,"[{'id': 2, 'name': 'Asia', 'approved': True}, ..."
1,23,Late Shang Religion,22,Clayton Ashton,35,Religious Group (v5),2014-04-17T05:00:00Z,-1250,-1045,7,Middle and Lower Yellow River Valley,"[{'id': 8, 'name': 'Religious Group', 'approve..."
2,173,Johannine Christianity,155,Rikard Roitto,35,Religious Group (v5),2014-11-27T22:05:55Z,90,120,483,Ephesus,"[{'id': 8, 'name': 'Religious Group', 'approve..."
3,174,Matthew-James-Didache Movement,156,Vojtech Kase,35,Religious Group (v5),2014-11-27T22:11:25Z,50,160,352,Syria,"[{'id': 8, 'name': 'Religious Group', 'approve..."
4,176,Qumran Movement,164,Jutta Jokiranta,35,Religious Group (v5),2014-11-27T22:22:29Z,-150,68,486,Jerusalem and environs,"[{'id': 8, 'name': 'Religious Group', 'approve..."


In [3]:
# as dictionary with use of all arguments
listed_entries = drh.list_entries(
    to_dataframe=False,
    limit=20,  # maximum 20 entries
    start_date=datetime(2018, 1, 1),  # published from 2018-01-01
    end_date="2019-01-01",  # to 2019-01-01 (this format also works)
    expert=697,  # by expert with id 697
    poll=[35, 43],  # in polls with id 35 or 43
    region=[805, 841],  # with regions with id 805 or 841
    ordering="date_created",  # ordered by date created
    offset=0,  # start from the first entry (default)
)

# verify that this works
for i in range(2):
    entry_name = listed_entries["results"][i]["name"]["name"]
    date_created = listed_entries["results"][i]["date_created"]
    expert_id = listed_entries["results"][i]["expert"]["id"]
    expert_first_name = listed_entries["results"][i]["expert"]["first_name"]
    expert_last_name = listed_entries["results"][i]["expert"]["last_name"]
    expert_name = f"{expert_first_name} {expert_last_name}"
    poll_name = listed_entries["results"][i]["poll"]["name"]
    year_from = listed_entries["results"][i]["year_from"]
    year_to = listed_entries["results"][i]["year_to"]
    region = listed_entries["results"][i]["region"]["name"]
    region_id = listed_entries["results"][i]["region"]["id"]
    print(
        f"entry: {entry_name} ({year_from}-{year_to}) published as a {poll_name} poll"
    )
    print(f"by: {expert_name} (id: {expert_id}) on {date_created}")
    print(f"region: {region} (id: {region_id})")
    print("\n")

entry: Khalka Mongols (1895-1930) published as a Religious Group (v5) poll
by: Emily Pitek (id: 697) on 2018-05-10T16:28:34.382442Z
region: Mongolia ca. 1920 (id: 805)


entry: Kurd (1926-1955) published as a Religious Group (v6) poll
by: Emily Pitek (id: 697) on 2018-08-07T14:21:11.619759Z
region: Kurdistan (id: 841)




## 1.2. list entry tags

In [4]:
# default
listed_entry_tags = drh.list_entry_tags()
listed_entry_tags.head(5)

Unnamed: 0,entry_tag_id,entry_tag_name,approved,parent_tag_id,created,created_by_id,created_by_username,created_by_name
0,2,Asia,True,7.0,2016-05-17T02:39:38.090109Z,1,root,
1,3,China,True,2.0,2016-05-17T02:39:38.090109Z,1,root,
2,4,Middle East,True,7.0,2016-05-17T02:39:38.090109Z,1,root,
3,5,South Pacific Ocean,True,7.0,2016-05-17T02:39:38.090109Z,1,root,
4,6,Japan,True,2.0,2016-05-17T02:39:38.090109Z,1,root,


In [5]:
# using arguments (and dictionary)
listed_entry_tags = drh.list_entry_tags(
    to_dataframe=False,
    limit=20,  # maximum 20 entries
    start_date=datetime(2021, 1, 1),  # published from 2018-01-01
    end_date=datetime(2022, 1, 1),  # to 2019-01-01
    approved=True,  # only get approved entry tags
    created_by=[969, 1000],  # created by experts with these ids
    offset=0,  # start from the 6th entry
    ordering="created",  # ordered by date created
)

for i in range(2):
    entry_tag_id = listed_entry_tags["results"][i]["id"]
    entry_tag_name = listed_entry_tags["results"][i]["name"]
    date_created = listed_entry_tags["results"][i]["created"]
    expert_id = listed_entry_tags["results"][i]["created_by"]["id"]
    expert_first_name = listed_entry_tags["results"][i]["created_by"]["first_name"]
    expert_last_name = listed_entry_tags["results"][i]["created_by"]["last_name"]
    expert_name = f"{expert_first_name} {expert_last_name}"
    approved = listed_entry_tags["results"][i]["approved"]
    print(f"entry tag: {entry_tag_name} (id: {entry_tag_id})")
    print(f"published by {expert_name} (id: {expert_id}) on {date_created}")
    print(f"has been approved: {approved}")
    print("\n")

entry tag: Temple of Confucius (Kong Temple) (id: 1077)
published by James Flath (id: 1000) on 2021-01-13T21:13:53.347422Z
has been approved: True


entry tag: Memorial arch (id: 1078)
published by James Flath (id: 1000) on 2021-01-13T21:14:45.423346Z
has been approved: True




## 1.3. List regions

In [6]:
# default
listed_regions = drh.list_regions()
listed_regions.head(5)

Unnamed: 0,region_id,region_name,description,created_by_id,created_by_name,geom,tags
0,6,Ancient Mediterranean,"Ancient Mediterranean tagged with ""Western Eur...",140,Andreas Bendlin,"[[[[55.520348384170575, -20.949366318200987], ...","[{'id': 3, 'name': 'Europe'}, {'id': 296, 'nam..."
1,7,Middle and Lower Yellow River Valley,Middle and Lower Yellow River Valley + Areas o...,22,Clayton Ashton,"[[[[108.66096496582, 34.34326171875], [116.184...","[{'id': 1, 'name': 'Asia'}, {'id': 37, 'name':..."
2,10,Qin Dynasty boundaries,Qin Dynasty boundaries,3,Carson Logan,"[[[[120.41260643779995, 27.15313777327598], [1...","[{'id': 1, 'name': 'Asia'}, {'id': 37, 'name':..."
3,13,Byzantine Empire (395-632),Byzantine Empire (395-632),3,Carson Logan,"[[[[18.543087244033988, 30.499881505965988], [...","[{'id': 3, 'name': 'Europe'}, {'id': 5, 'name'..."
4,14,Rotuma,"Note: Rotuma is a very small island, near Fiji...",1,,"[[[[177.06265640329, -12.485961914063], [177.1...","[{'id': 13, 'name': 'Oceania'}, {'id': 309, 'n..."


In [7]:
# using arguments and dictionary
listed_regions = drh.list_regions(
    to_dataframe=False,
    limit=20,  # maximum 20 entries
    start_date=datetime(2015, 6, 1),  # published from 2015-06-01
    end_date=datetime(2016, 6, 1),  # to 2016-06-01
    created_by=1,  # created by experts with these ids
    offset=2,  # start from 3rd entry
    ordering="created",  # ordered by date created
)

# verify that this works
# these are published by "root" (id: 1) where we do not have a name.
for i in range(2):
    region_id = listed_regions["results"][i]["id"]
    region_name = listed_regions["results"][i]["name"]
    date_created = listed_regions["results"][i]["date_created"]
    expert_id = listed_regions["results"][i]["created_by"]["id"]
    expert_first_name = listed_regions["results"][i]["created_by"]["first_name"]
    expert_last_name = listed_regions["results"][i]["created_by"]["last_name"]
    expert_name = f"{expert_first_name} {expert_last_name}"
    print(f"region: {region_name} (id: {region_id})")
    print(f"published by {expert_name} (id: {expert_id}) on {date_created}")
    print("\n")

region: Metro Vancouver (id: 589)
published by   (id: 1) on 2016-03-04T04:24:45.080472Z


region: Central Canada & USA (id: 590)
published by   (id: 1) on 2016-03-10T01:36:13.810016Z




## 1.4. List region tags

In [8]:
listed_region_tags = drh.list_region_tags()
listed_region_tags.head(5)

Unnamed: 0,region_tag_id,region_tag_name,approved,parent_tag_id,created,created_by_id,created_by_username,created_by_name
0,1,Asia,True,,2015-09-23T15:50:23.128769Z,1,root,
1,3,Europe,True,,2015-09-23T15:50:23.128769Z,1,root,
2,5,Middle East,True,,2015-09-23T15:50:23.128769Z,1,root,
3,13,Oceania,True,,2015-09-23T15:50:23.128769Z,1,root,
4,14,Africa,True,,2015-09-23T15:50:23.128769Z,1,root,


In [9]:
# using arguments
listed_region_tags = drh.list_region_tags(
    to_dataframe=False,
    limit=10,  # maximum 10 entries
    start_date=datetime(2019, 1, 1),  # published from 2019
    end_date=datetime(2022, 1, 1),  # to 2020
    approved=False,  # only get non-approved region tags
    created_by=[745, 782],  # created by these experts
    offset=0,  # start from the first entry
    ordering="created",  # ordered by date created
)

# verify that this works
for i in range(2):
    region_tag_id = listed_region_tags["results"][i]["id"]
    region_tag_name = listed_region_tags["results"][i]["name"]
    date_created = listed_region_tags["results"][i]["created"]
    expert_id = listed_region_tags["results"][i]["created_by"]["id"]
    expert_first_name = listed_region_tags["results"][i]["created_by"]["first_name"]
    expert_last_name = listed_region_tags["results"][i]["created_by"]["last_name"]
    expert_name = f"{expert_first_name} {expert_last_name}"
    approved = listed_region_tags["results"][i]["approved"]
    print(f"region tag: {region_tag_name} (id: {region_tag_id})")
    print(f"published by {expert_name} (id: {expert_id}) on {date_created}")
    print(f"has been approved: {approved}")
    print("\n")


region tag: England (id: 379)
published by Hazel J. Hunter Blair (id: 745) on 2019-04-17T15:40:26.710649Z
has been approved: False


region tag: Eastern Himalayas (id: 386)
published by Dagmar Schwerk (id: 782) on 2019-11-29T00:20:22.688568Z
has been approved: False




# 2. Find endpoints

Find endpoints always by ID: 
- For entries: entry id
- For entry tags: entry tag id. 
- For regions: region id
- For region tags: region tag id

Always only returns 1 result.

## 2.1. Find entry (by entry ID)

In [10]:
entry_match = drh.find_entry(23)
dict(islice(entry_match.items(), 10))

{'id': 23,
 'name': {'id': 47, 'name': 'Late Shang Religion'},
 'alternative_names': [],
 'description': "This entry was completed by Clayton Ashton, a Ph.D ABD student in early Chinese studies at UBC, utilizing Keightley's relevant published sources. The entry was then printed out and mailed to Keightley for his perusal and editing. Finally, Edward Slingerland visited Keightley at his home in Berkeley on August 13, 2014, discussed the entries for both Shang religion and Shang polity, and noted the edits or additions that Keightley recommended.",
 'external_url': '',
 'date_created': '2014-04-17T05:00:00Z',
 'date_updated': '2019-07-09T17:55:04.185850Z',
 'year_from': -1250,
 'year_to': -1045,
 'region': {'id': 7,
  'name': 'Middle and Lower Yellow River Valley',
  'geojson': {'type': 'MultiPolygon',
   'coordinates': [[[[108.66096496582, 34.34326171875],
      [116.18407845497, 33.867191076279],
      [118.98811340332, 37.68310546875],
      [118.76838684082, 38.16650390625],
      [1

## 2.2. find entry tag (by tag ID)

In [11]:
entry_tag_match = drh.find_entry_tag("8")  # also works with string
entry_tag_match

{'id': 8,
 'name': 'Religious Group',
 'approved': True,
 'parent_tag_id': None,
 'created': '2016-05-17T02:39:38.090109Z',
 'created_by': {'id': 1,
  'username': 'root',
  'first_name': '',
  'last_name': ''}}

## 2.3. find region (by region ID)

In [12]:
region_match = drh.find_region(805)
dict(islice(region_match.items(), 10))

{'id': 805,
 'name': 'Mongolia ca. 1920',
 'date_created': '2018-05-10T16:28:34.362386Z',
 'date_modified': '2018-05-10T16:28:34.362537Z',
 'description': 'This entry focuses specifically on the Khalkha Mongols of the Narobanchin Temple Territory, in what was historically western Outer Mongolia (now Mongolia). (ca. 1920)',
 'additional_info': None,
 'created_by': {'id': 697,
  'username': 'Emily_Pitek_1525199247',
  'first_name': 'Emily',
  'last_name': 'Pitek'},
 'geom': {'type': 'MultiPolygon',
  'coordinates': [[[[96.82245703881323, 47.16352274793011],
     [97.21796485131323, 47.16725746445123],
     [97.64093848412571, 47.07381077312135],
     [97.90461035912574, 46.8864249110966],
     [97.53656836693824, 46.57391397896603],
     [97.38275977318821, 46.50212059609866],
     [96.31159278100075, 46.49455787589096],
     [95.92157813256323, 46.60788824114182],
     [95.87213965600074, 46.84511187744965],
     [96.57526465600073, 47.14858125607304],
     [96.82245703881323, 47.163522

## 2.4. find region tag (by tag ID)

In [13]:
region_tag_match = drh.find_region_tag(805)
region_tag_match

{'id': 805,
 'name': 'Kition',
 'approved': True,
 'parent_tag_id': 735,
 'created': '2022-07-28T08:27:01.152580Z',
 'created_by': {'id': 1304,
  'username': 'Marina_Toumpouri_1651640835',
  'first_name': 'Marina',
  'last_name': 'Toumpouri'}}

# 3. Related Questions
In the Database of Religious History (DRH) questions can be related to other questions. This typically connects questions across different polls, but can also connect questions within the same poll. These can be extracted through the `get_related_questions()` method. 

In [14]:
# default 
questionrelations = drh.get_related_questions()
questionrelations.head(5)

Unnamed: 0,question_id,related_question_id
0,2231,2231
1,4693,2231
2,2232,2232
3,4669,2232
4,6894,2232


By default the method returns a dataframe of `question_id` (primary key) and `related_question_id`, where `related_question_id` is the lowest Question ID within the group of related questions. 

In [15]:
# raw data (as dataframe)
questionrelations = drh.get_related_questions(simplify=True)
questionrelations.head(5)

Unnamed: 0,question_id,related_question_id
0,2231,2231
1,4693,2231
2,2232,2232
3,4669,2232
4,6894,2232


This behavior can be switched by setting `simply=False` to obtain the questionrelations as they are stored in the DRH.

In [16]:
# raw data (as list of dictionary)
questionrelations = drh.get_related_questions(to_dataframe=False)
questionrelations[0:5]

[{'id': 18842, 'first_question_id': 6335, 'second_question_id': 4652},
 {'id': 18843, 'first_question_id': 6335, 'second_question_id': 5658},
 {'id': 18844, 'first_question_id': 6335, 'second_question_id': 6335},
 {'id': 18845, 'first_question_id': 6335, 'second_question_id': 5231},
 {'id': 18846, 'first_question_id': 6335, 'second_question_id': 3475}]

Finally, the data can be obtained as a list of dictionaries with no preprocessing done by the wrapper.

# 4. Answersets
To obtain full answersets use the `get_answerset()` method. This method requires a Question Name and fetches the full answerset for all entries that have this exact question. Question names can be found by browsing https://religiondatabase.org/

Note that some questions might be worded differently across different polls (questionnaires). For instance, there is a question called "Is a spirit-body distinction present:" (for religious group polls) and a question called "Is a spirit-body distinction present in the text?" (for religious text polls). In this case, if you want to use answers from both the Text poll and the Group poll you will need to fetch these answers in two separate calls. 

In [17]:
question_names = [
    "Is a spirit-body distinction present:",
    "Is a spirit-body distinction present in the text?"
]

# get answersets (this will take a minute)
answerset_list = []
for question_name in question_names:
    answerset = drh.get_answerset(question_name=question_name, to_dataframe=True)
    answerset["question_name"] = question_name
    answerset_list.append(answerset)

# concatenate the answersets
answerset_df = pd.concat(answerset_list)

In [18]:
# here we can see that we have successfully matched group and text poll entries
answerset_df.groupby(['question_name', 'poll_name']).size()

question_name                                      poll_name            
Is a spirit-body distinction present in the text?  Religious Text (v0.1)      1
                                                   Religious Text (v1.0)    322
Is a spirit-body distinction present:              Religious Group (v5)     218
                                                   Religious Group (v6)     681
dtype: int64

In [19]:
# check dtypes for this endpoint as it is important
answerset_df.dtypes

entry_id                int64
entry_name             object
poll_id                 int64
poll_name              object
question_id             int64
answer_name            object
answer_value            int64
year_from               int64
year_to                 int64
region_id               int64
status_participants    object
expert_id               int64
expert_name            object
date_created           object
question_name          object
dtype: object

This endpoint provides the key information that is tied to answers. Note that while entries have an overall (default) `date-range`, `region`, `and status of participants`, this information can be provided specifically for answers to individual questions. While the overall (default) information for entries can be obtained through the `list_entries()` method, the `get_answerset()` method provides this at question-level.

# 5. Utility 
Construct dataframe with all essential information on an entry in the DRH: 
- answers to questions
- metadata (e.g., expert, editor, etc.)
- temporal span and geographical information (incl. geom)

Everything below depends on the endpoint `find_entry()` which does not scale well with many calls. We recommend using the endnpoint `get_answerset()` described above instead. 

## 5.1. Entries

### 5.1.1. get entry information from list
`drh.dataframe_from_entry_id_list()` gathers all information from the `drh.find_entry()` method for all entry IDs provided and returns as a dataframe. 

In [20]:
listed_entries = drh.list_entries(limit=10, offset=500)
entry_id_list = listed_entries['entry_id'].unique()
df_entries = drh.dataframe_from_entry_id_list(entry_id_list)

100%|██████████| 10/10 [00:39<00:00,  3.98s/it]


In [21]:
# show some of the important columns
df_entries[['entry_id', 'entry_name', 'description', 'year_from', 'year_to', 'region', 'poll', 'tags', 'categories']].head(3)

Unnamed: 0,entry_id,entry_name,description,year_from,year_to,region,poll,tags,categories
0,939,Goodenough and Fergusson Islanders,The Goodenough and Fergussoon Islands are in t...,1875,1900,"{'id': 1085, 'name': 'Goodenough Island - Nort...","{'id': 43, 'name': 'Religious Group (v6)', 'de...","[{'id': 8, 'name': 'Religious Group'}, {'id': ...","[{'id': 77, 'name': 'Sources', 'groups': [], '..."
1,940,Julio-Claudian Imperial Cult,The term ‘imperial cult’ refers to the worship...,-31,68,"{'id': 132, 'name': 'Roman Empire (Greatest Ex...","{'id': 43, 'name': 'Religious Group (v6)', 'de...","[{'id': 8, 'name': 'Religious Group'}, {'id': ...","[{'id': 77, 'name': 'Sources', 'groups': [], '..."
2,941,Chan Buddhists in early Qing period,Chan Buddhists refer to the religious practiti...,1600,1700,"{'id': 1087, 'name': 'China in the seventeenth...","{'id': 43, 'name': 'Religious Group (v6)', 'de...","[{'id': 8, 'name': 'Religious Group'}, {'id': ...","[{'id': 77, 'name': 'Sources', 'groups': [], '..."


### 5.1.2. extract key entry (metadata) information
Utility function to clean the dataframe and return key columns.

In [22]:
df_entries_clean = drh.extract_entry_information(df_entries)
df_entries_clean.head(2)

Unnamed: 0,entry_id,entry_name,description,date_created,year_from,year_to,region_id,region_name,expert_id,expert_name,poll_id,poll_name
0,939,Goodenough and Fergusson Islanders,The Goodenough and Fergussoon Islands are in t...,2020-08-04T03:44:24.479188Z,1875,1900,1085,Goodenough Island - Northern D'Entrecasteaux,710,Thomas White,43,Religious Group (v6)
1,940,Julio-Claudian Imperial Cult,The term ‘imperial cult’ refers to the worship...,2020-08-04T16:30:00.660649Z,-31,68,132,Roman Empire (Greatest Extent),892,Treasa Bell,43,Religious Group (v6)


### 5.1.3. Finding answers (categories)
It is possible to manually extract the answerset through the `categories` column (or key, value in dictionary). 
However, this is difficult given the organization and it is recommended to use the implemented method for this. 

In [23]:
answerset = drh.extract_answer_information(df_entries) # cannot be df_entries_clean because the full answerset is omitted in the metadata
answerset[['entry_id', 'entry_name', 'question_id', 'question_name', 'answer_set_year_from', 'answer_set_year_to', 'answer_set_region_id', 'answer_name', 'answer_value']].iloc[100:103]

Unnamed: 0,entry_id,entry_name,question_id,question_name,answer_set_year_from,answer_set_year_to,answer_set_region_id,answer_name,answer_value
100,939,Goodenough and Fergusson Islanders,5132,Does membership in this religious group requir...,1875,1900,1085,No,0
101,939,Goodenough and Fergusson Islanders,5137,Does membership in this religious group requir...,1875,1900,1085,No,0
102,939,Goodenough and Fergusson Islanders,5142,Does membership in this religious group requir...,1875,1900,1085,No,0


## 5.2. Extract regions 

In [24]:
regions = drh.extract_region_information(df_entries)
regions.head(5)

Unnamed: 0,entry_id,entry_name,region_id,region_name,region_geom,region_description
0,939,Goodenough and Fergusson Islanders,1085,Goodenough Island - Northern D'Entrecasteaux,"[[[[150.1518790210183, -9.176727171355626], [1...",An island in the North-West of the D'Entrecast...
1,940,Julio-Claudian Imperial Cult,132,Roman Empire (Greatest Extent),"[[[[-9.964599609375, 30.473929840535313], [-9....","Most of Western Europe and the Mediterranean, ..."
2,941,Chan Buddhists in early Qing period,1087,China in the seventeenth century,"[[[[121.77169738544389, 38.978728863436245], [...",Here the territory mainly refers to China prop...
3,942,African Methodist Episcopal Church,1095,"North America, South America, Caribbean, Asia,...","[[[[-117.521637150959, 32.55075325113483], [-1...",While the denomination originated in the North...
4,943,Moravian Missionaries in Nunatsiavut,1091,Nunatsiavut and Labrador,"[[[[-65.6836080551147, 61.31245157483824], [-6...","This region consists of Nunatsiavut, the Inuit..."


## 5.3. Extract entry tags

In [25]:
entry_tags = drh.extract_entry_tags(df_entries)
entry_tags.head(5)

Unnamed: 0,entry_id,entry_name,entry_tag_id,entry_tag_name
0,939,Goodenough and Fergusson Islanders,8,Religious Group
0,939,Goodenough and Fergusson Islanders,40,Oceanic Religions
0,939,Goodenough and Fergusson Islanders,311,Melanesia
0,939,Goodenough and Fergusson Islanders,798,Kastom
1,940,Julio-Claudian Imperial Cult,8,Religious Group
