In [184]:
import pandas as pd
import requests
import sddk

import gspread
from gspread_dataframe import get_as_dataframe, set_with_dataframe
from google.oauth2 import service_account # based on google-auth library

In [237]:
# for exporting data to googlesheets/sciencedata
# (feel free to skip)

s = sddk.cloudSession("sciencedata.dk")
# establish connection with gogglesheets...
file_data = s.read_file("https://sciencedata.dk/files/ServiceAccountsKey.json", "dict") # or load it from a local storage: json.load(open("../../ServiceAccountsKey.json", "r"))
credentials = service_account.Credentials.from_service_account_info(file_data)
gc = gspread.Client(auth=credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']))
paul_results = gc.open_by_url("https://docs.google.com/spreadsheets/d/1h4M-gK9TPIfeTV528tUuPBfZF1wtcNCA10yIlJYqGTE/edit?usp=sharing")

In [3]:
# find local version of the input dataset
local_paths = !find ~/Projects -name "LIRE_v1-0.json"
print(local_paths)

['/Users/kasev/Projects/paul/data/large_files/LIRE_v1-0.json']


In [62]:
# LAGT (v1.0.1) dataset directly from Zenodo
try:
    LAGT = pd.read_json(local_paths[0])
except:
    resp = requests.get("https://zenodo.org/record/4971946/files/LAGT_v1-0.json?download=1")
    # save it for next time
    LAGT = pd.DataFrame(resp.json())
    LAGT.to_json("../data/large_files/LIRE_v1-0.json")

In [63]:
LAGT.columns

Index(['filename', 'author', 'title', 'wordcount', 'author_id', 'doc_id',
       'raw_date', 'date_avr', 'date_probs', 'date_manual', 'provenience',
       'tlg_epithet', 'clean_string', 'n_sentences', 'lemmatized_sentences'],
      dtype='object')

In [64]:
len(LAGT[LAGT["provenience"]=="christian"])

147

In [158]:
# upload manually updated metadata
LAGT_metadata = pd.read_csv("../data/LAGT_metadata.csv")

# update provenience
provenience_dict = dict(zip(LAGT_metadata["filename"],  LAGT_metadata["provenience"]))
LAGT["provenience"] = LAGT["filename"].apply(lambda x: provenience_dict[x])

# update author
author_dict = dict(zip(LAGT_metadata["filename"],  LAGT_metadata["author"]))
LAGT["author"] = LAGT["filename"].apply(lambda x: author_dict[x])

# update title
title_dict = dict(zip(LAGT_metadata["filename"],  LAGT_metadata["title"]))
LAGT["title"] = LAGT["filename"].apply(lambda x: title_dict[x])

In [159]:
len(LAGT[LAGT["provenience"]=="christian"])

148

In [160]:
# non-dated christian texts
LAGT[(LAGT["provenience"]=="christian") & (LAGT["date_avr"].isnull())]

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences,sentences_paul,sentences_paul_N


In [161]:
(LAGT["date_avr"] < 5).sum()

1277

In [162]:
for date in [2, 2.5, 3, 3.5, 4, 4.5, 5]:
    mask = ((LAGT["date_avr"] <= date) & (LAGT["provenience"]=="christian"))
    print(date,
          mask.sum(),
          LAGT[mask]["wordcount"].sum()
          )

2 103 1642004
2.5 104 1711991
3 105 1714782
3.5 137 3059696
4 143 3361426
4.5 148 3566823
5 148 3566823


In [218]:
# all texts before the end of the
LAGT = LAGT[(LAGT["date_avr"] <= 5) & (LAGT["provenience"]=="christian")]

In [219]:
LAGT[LAGT["author_id"].str.startswith("tlg0031")]

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences,sentences_paul,sentences_paul_N,sentences_apostle,sentences_apostle_N
385,tlg0031.tlg001.perseus-grc2.xml,Gospel of Matthew,Gospel of Matthew,18288,tlg0031a,tlg0031.tlg001,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΒΙΒΛΟΣ γενέσεως Ἰησοῦ Χριστοῦ υἱοῦ Δαυεὶδ υἱο...,1276,"[[βίβλος, γένεσις, Ἰησοῦς, Χριστός, υἱός, Δαυί...",[],0,"[[ἀπόστολος, ὄνομα, εἰμί]]",1
386,tlg0031.tlg002.perseus-grc2.xml,Gospel of Mark,Gospel of Mark,11274,tlg0031b,tlg0031.tlg002,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΑΡΧΗ τοῦ εὐαγγελίου Ἰησοῦ Χριστοῦ . Καθὼς γέγ...,790,"[[ἀρχή, εὐαγγέλιον, Ἰησοῦς, Χριστός], [γράφω, ...",[],0,"[[ποιέω, ἀπόστολος, ὀνομάζω, ἀποστέλλω, κηρύσσ...",2
387,tlg0031.tlg003.perseus-grc2.xml,Luke-Acts,Gospel of Luke,19458,tlg0031luke,tlg0031.tlg003,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΕΠΕΙΔΗΠΕΡ ΠΟΛΛΟΙ ἐπεχείρησαν ἀνατάξασθαι διήγ...,1274,"[[πολύς, ἐπιχειρέω, ἀνατάσσομαι, διήγησις, πλη...",[],0,"[[γίγνομαι, ἡμέρα, προσφωνέω, μαθητής, ἐκλεξάμ...",6
388,tlg0031.tlg004.perseus-grc2.xml,Johnannine literature (New Testament),Gospel of John,15590,tlg0031john,tlg0031.tlg004,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"ΕΝ ΑΡΧΗ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θ...",1164,"[[εἰμί, ἀρχή, εἰμί, λόγος, λόγος, εἰμί, θεός, ...",[],0,"[[λέγω, εἰμί, δοῦλος, μέγας, κύριος, ἀπόστολος...",1
389,tlg0031.tlg005.perseus-grc2.xml,Luke-Acts,Acts,18406,tlg0031luke,tlg0031.tlg005,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"τὸν μὲν πρῶτον λόγον ἐποιησάμην περὶ πάντων, ...",960,"[[πρῶτος, λόγος, ποιέω, πᾶς, Θεόφιλος, ἄρχω, Ἰ...","[[διέρχομαι, ὅλος, νῆσος, Πάφος, εὑρίσκω, ἀνήρ...",125,"[[πρῶτος, λόγος, ποιέω, πᾶς, Θεόφιλος, ἄρχω, Ἰ...",26
390,tlg0031.tlg006.perseus-grc2.xml,Paul of Tarsus,Romans,7107,tlg0031paul,tlg0031.tlg006,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"ΠΑΥΛΟΣ δοῦλος Ἰησοῦ Χριστοῦ, κλητὸς ἀπόστολος...",490,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...","[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",1,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",3
391,tlg0031.tlg007.perseus-grc2.xml,Paul of Tarsus,1 Corinthians,6814,tlg0031paul,tlg0031.tlg007,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ κλητὸς ἀπόστολος Ἰησοῦ Χριστοῦ διὰ θελ...,544,"[[Παῦλος, κλητός, ἀπόστολος, Ἰησοῦς, Χριστός, ...","[[Παῦλος, κλητός, ἀπόστολος, Ἰησοῦς, Χριστός, ...",7,"[[Παῦλος, κλητός, ἀπόστολος, Ἰησοῦς, Χριστός, ...",9
392,tlg0031.tlg008.perseus-grc2.xml,Paul of Tarsus,2 Corinthians,4470,tlg0031paul,tlg0031.tlg008,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ ἀπόστολος Χριστοῦ Ἰησοῦ διὰ θελήματος ...,274,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...","[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",2,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",6
393,tlg0031.tlg009.perseus-grc2.xml,Paul of Tarsus,Galatians,2235,tlg0031paul,tlg0031.tlg009,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"ΠΑΥΛΟΣ ἀπόστολος, οὐκ ἀπʼ ἀνθρώπων οὐδὲ διʼ ἀ...",159,"[[Παῦλος, ἀπόστολος, ἄνθρωπος, ἄνθρωπος, Ἰησοῦ...","[[Παῦλος, ἀπόστολος, ἄνθρωπος, ἄνθρωπος, Ἰησοῦ...",2,"[[Παῦλος, ἀπόστολος, ἄνθρωπος, ἄνθρωπος, Ἰησοῦ...",3
394,tlg0031.tlg010.perseus-grc2.xml,Pauline literature,Ephesians,2421,tlg0031pspa,tlg0031.tlg010,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ ἀπόστολος Χριστοῦ Ἰησοῦ διὰ θελήματος ...,80,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...","[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",2,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",4


In [220]:
LAGT[LAGT["doc_id"].str.startswith("tlg0031.tlg006")]

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences,sentences_paul,sentences_paul_N,sentences_apostle,sentences_apostle_N
390,tlg0031.tlg006.perseus-grc2.xml,Paul of Tarsus,Romans,7107,tlg0031paul,tlg0031.tlg006,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"ΠΑΥΛΟΣ δοῦλος Ἰησοῦ Χριστοῦ, κλητὸς ἀπόστολος...",490,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...","[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",1,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",3


In [221]:
LAGT[LAGT["author_id"].str.startswith("tlg0031paul")]

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences,sentences_paul,sentences_paul_N,sentences_apostle,sentences_apostle_N
390,tlg0031.tlg006.perseus-grc2.xml,Paul of Tarsus,Romans,7107,tlg0031paul,tlg0031.tlg006,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"ΠΑΥΛΟΣ δοῦλος Ἰησοῦ Χριστοῦ, κλητὸς ἀπόστολος...",490,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...","[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",1,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",3
391,tlg0031.tlg007.perseus-grc2.xml,Paul of Tarsus,1 Corinthians,6814,tlg0031paul,tlg0031.tlg007,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ κλητὸς ἀπόστολος Ἰησοῦ Χριστοῦ διὰ θελ...,544,"[[Παῦλος, κλητός, ἀπόστολος, Ἰησοῦς, Χριστός, ...","[[Παῦλος, κλητός, ἀπόστολος, Ἰησοῦς, Χριστός, ...",7,"[[Παῦλος, κλητός, ἀπόστολος, Ἰησοῦς, Χριστός, ...",9
392,tlg0031.tlg008.perseus-grc2.xml,Paul of Tarsus,2 Corinthians,4470,tlg0031paul,tlg0031.tlg008,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ ἀπόστολος Χριστοῦ Ἰησοῦ διὰ θελήματος ...,274,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...","[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",2,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",6
393,tlg0031.tlg009.perseus-grc2.xml,Paul of Tarsus,Galatians,2235,tlg0031paul,tlg0031.tlg009,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],"ΠΑΥΛΟΣ ἀπόστολος, οὐκ ἀπʼ ἀνθρώπων οὐδὲ διʼ ἀ...",159,"[[Παῦλος, ἀπόστολος, ἄνθρωπος, ἄνθρωπος, Ἰησοῦ...","[[Παῦλος, ἀπόστολος, ἄνθρωπος, ἄνθρωπος, Ἰησοῦ...",2,"[[Παῦλος, ἀπόστολος, ἄνθρωπος, ἄνθρωπος, Ἰησοῦ...",3
395,tlg0031.tlg011.perseus-grc2.xml,Paul of Tarsus,Philippians,1631,tlg0031paul,tlg0031.tlg011,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ ΚΑΙ ΤΙΜΟΘΕΟΣ δοῦλοι Χριστοῦ Ἰησοῦ πᾶσι...,83,"[[Παῦλος, Τιμόθεος, δοῦλος, Χριστός, Ἰησοῦς, ἅ...","[[Παῦλος, Τιμόθεος, δοῦλος, Χριστός, Ἰησοῦς, ἅ...",1,"[[ἀναγκαῖος, ἡγέομαι, Ἐπαφρόδιτος, ἀδελφός, συ...",1
397,tlg0031.tlg013.perseus-grc2.xml,Paul of Tarsus,1 Thessalonians,1475,tlg0031paul,tlg0031.tlg013,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ ΚΑΙ ΣΙΛΟΥΑΝΟΣ ΚΑΙ ΤΙΜΟΘΕΟΣ τῇ ἐκκλησίᾳ...,65,"[[Παῦλος, σιλουανος, Τιμόθεος, ἐκκλησία, Θεσσα...","[[Παῦλος, σιλουανος, Τιμόθεος, ἐκκλησία, Θεσσα...",2,"[[λόγος, κολακεία, γίγνομαι, οἶδα, πρόφασις, π...",1
402,tlg0031.tlg018.perseus-grc2.xml,Paul of Tarsus,Philemon,334,tlg0031paul,tlg0031.tlg018,A.D. 1,0.5,{'0.5': 1},0.5,christian,[],ΠΑΥΛΟΣ δέσμιος Χριστοῦ Ἰησοῦ καὶ Τιμόθεος ὁ ἀ...,17,"[[Παῦλος, δέσμιος, Χριστός, Ἰησοῦς, Τιμόθεος, ...","[[Παῦλος, δέσμιος, Χριστός, Ἰησοῦς, Τιμόθεος, ...",3,[],0


In [222]:
def get_word_sentences(sentences_list, word):
    selected_sentences = [sentence for sentence in sentences_list if word in sentence]
    return selected_sentences

In [223]:
word = "Παῦλος"
LAGT["sentences_paul"] = LAGT["lemmatized_sentences"].apply(lambda x: get_word_sentences(x, word))

In [224]:
LAGT["sentences_paul_N"] = LAGT["sentences_paul"].apply(lambda x: len(x))

In [225]:

word = "ἀπόστολος"
LAGT["sentences_apostle"] = LAGT["lemmatized_sentences"].apply(lambda x: get_word_sentences(x, word))

In [226]:
LAGT["sentences_apostle_N"] = LAGT["sentences_apostle"].apply(lambda x: len(x))


In [227]:
# how many documents contain the word "Παῦλος"
len(LAGT[LAGT["sentences_paul_N"]>0])

82

In [228]:
len(LAGT[LAGT["sentences_apostle_N"]>0])

104

In [229]:
LAGT["sentences_paul_N"].sum()

1045

"Acts of Paul and Thecla"
- http://www.patrologia-lib.ru/apocryph/novum/a_paul.htm

In [231]:
LAGT[(LAGT["provenience"]=="christian") & (LAGT["author_id"]!="tlg0031paul")]["sentences_paul_N"].sum()

1027

In [232]:
# save for future usage
LAGT.to_json("../data/large_files/LAGT_paul.json")

In [234]:
df = LAGT[(LAGT["author_id"]!="tlg0031paul")]#  ["sentences_paul_N"].sum()

In [213]:
df_authors = df.groupby("author_id").sum()[["wordcount", "n_sentences", "sentences_paul_N", "sentences_apostle_N"]].sort_values("sentences_paul_N", ascending=False)#
author_dict = dict(zip(LAGT_metadata["author_id"],  LAGT_metadata["author"]))
def get_author(x):
    try:
        return author_dict[x]
    except:
        return None

date_dict = dict(zip(LAGT_metadata["author_id"],  LAGT_metadata["date_avr"]))

def get_list_of_works(author_id):
    works = LAGT[LAGT["author_id"]==author_id]["title"].tolist()
    return works


df_authors.reset_index(inplace=True)
df_authors["author"] = df_authors["author_id"].apply(get_author)
df_authors["works"] = df_authors["author_id"].apply(get_list_of_works)
df_authors["works_N"] = df_authors["works"].apply(len)
df_authors["date_avr"] = df_authors["author_id"].apply(lambda x: date_dict[x])
df_authors.set_index("author", inplace=True)
df_authors.reset_index(inplace=True)

df_authors["paul_proportion"] = df_authors["sentences_paul_N"] / df_authors["n_sentences"]

df_authors

Unnamed: 0,author,author_id,wordcount,n_sentences,sentences_paul_N,sentences_apostle_N,works,works_N,date_avr,paul_proportion
0,Origen,tlg2042,931431,110127,459,526,"[Contra Celsum, Commentarii in Evangelium Joan...",44,2.0,0.004168
1,Luke-Acts,tlg0031luke,37864,2234,125,32,"[Gospel of Luke, Acts]",2,0.5,0.055953
2,Epiphanius,tlg2021,325038,33079,102,357,"[Ancoratus, Panarion (Adversus haereses), Anac...",3,3.5,0.003084
3,Socrates of Constantinople,tlg2057,103901,8743,76,64,[Historia Ecclesiastica],1,4.0,0.008693
4,Sozomen,tlg2048,112966,7150,63,30,[Historia Ecclesiastica],1,4.5,0.008811
5,Athanasius of Alexandria,tlg2035,94010,10789,49,90,[De decretis Nicaenae synodi (Chapters 41 and ...,5,3.5,0.004542
6,Theodoret,tlg4089,153333,16094,40,36,"[Historia ecclesiastica , Historia Religiosa]",2,4.0,0.002485
7,Basil of Caesarea,tlg2040,144429,11257,23,36,"[To Young Men, On How They Might Dervice Profi...",2,3.5,0.002043
8,Clement of Alexandria,tlg0555,235754,14753,18,101,"[Protrepticus, Paedagogus, Stromata (Books I-V...",7,2.0,0.00122
9,Eusebius,tlg2018,620546,35220,17,199,"[Praeperatio Evangelica, Historia ecclesiastic...",10,3.5,0.000483


In [214]:
df_authors.columns

Index(['author', 'author_id', 'wordcount', 'n_sentences', 'sentences_paul_N',
       'sentences_apostle_N', 'works', 'works_N', 'date_avr',
       'paul_proportion'],
      dtype='object')

In [216]:
columns = ['author', 'author_id', 'works_N', 'works', 'wordcount', 'n_sentences',
        'date_avr', 'sentences_apostle_N', 'sentences_paul_N', 'paul_proportion']

In [217]:
set_with_dataframe(paul_results.add_worksheet("christian_authors_overview", 1,1), df_authors[columns])