In [1]:
### PREREQUISTIES
### (many used only in one notebook...)

import os
from importlib import reload
import pandas as pd
import numpy as np
import pickle
import math
import random
import sys
import csv
import unicodedata
import requests
import re
import networkx as nx
from scipy import stats
import plotly
import kaleido

import nltk

import matplotlib.pyplot as plt
import seaborn as sns

# gensim parts
from gensim import corpora
from gensim import models

### scikit-learn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.decomposition import TruncatedSVD 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances

import plotly.graph_objects as go

import sddk

import gspread
from gspread_dataframe import get_as_dataframe, set_with_dataframe
from google.oauth2 import service_account # based on google-auth library

In [2]:
import textnet

In [3]:
reload(textnet)

<module 'textnet' from '/home/kasev/ECCE_AGT/scripts/textnet.py'>

In [4]:
### not neccessary for reading the data, just for exporting them to sciencedata.dk
conf = sddk.configure("SDAM_root", "648597@au.dk")

sciencedata.dk username (format '123456@au.dk'): 648597@au.dk
sciencedata.dk password: ········
connection with shared folder established with you as its owner
endpoint variable has been configured to: https://sciencedata.dk/files/SDAM_root/


In [5]:
# to access gsheet, you need Google Service Account key json file
# I have mine located in my personal space on sciencedata.dk, so I read it from there:

# (1) read the file and parse its content
file_data = conf[0].get("https://sciencedata.dk/files/ServiceAccountsKey.json").json()
# (2) transform the content into crendentials object
credentials = service_account.Credentials.from_service_account_info(file_data)
# (3) specify your usage of the credentials
scoped_credentials = credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'])
# (4) use the constrained credentials for authentication of gspread package
gc = gspread.Client(auth=scoped_credentials)

ECCE_AGT_overview = gc.open_by_url("https://docs.google.com/spreadsheets/d/1KPpPaeX215HR_fVrakvJp8aB6oZDhHFTcBw0MKLw6as/edit?usp=sharing")

In [7]:
AGT = sddk.read_file("SDAM_data/AGT/AGT_lemmatized_20201211.json", "df", conf)
AGT.head(5)

Unnamed: 0,filename,author,title,wordcount,author_id,doc_id,raw_date,date_avr,date_probs,date_manual,provenience,tlg_epithet,clean_string,n_sentences,lemmatized_sentences
0,tlg0001.tlg001.perseus-grc2.xml,Apollonius Rhodius,Argonautica,38822,tlg0001,tlg0001.tlg001,3 B.C.,-2.5,{'-2.5': 1},-2.5,pagan,Epici/-ae,"ἀρχόμενος σέο, Φοῖβε, παλαιγενέων κλέα φωτῶν μ...",3252,"[[ἄρχω, Φοῖβος, παλαιγενής, κλέος, φώς, μιμνήσ..."
1,tlg0003.tlg001.perseus-grc2.xml,Thucydides,The Peloponnesian War,150118,tlg0003,tlg0003.tlg001,5 B.C.,-4.5,{'-4.5': 1},-4.5,pagan,Historici/-ae,Θουκυδίδης Ἀθηναῖος ξυνέγραψε τὸν πόλεμον τῶν...,6068,"[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ..."
2,tlg0004.tlg001.perseus-grc1.xml,Diogenes Laertius,Lives of Eminent Philosophers,110763,tlg0004,tlg0004.tlg001,A.D. 3,2.5,{'2.5': 1},,,Biographi,Τὸ τῆς φιλοσοφίας ἔργον ἔνιοί φασιν ἀπὸ βαρβάρ...,10245,"[[φιλοσοφία, ἔργον, ἔνιοι, φημί, βάρβαρος, ἄρχ..."
3,tlg0005.tlg001.perseus-grc1.xml,Theocritus,Idylls,19200,tlg0005,tlg0005.tlg001,4-3 B.C.,-3.0,"{'-3.5': 0.5, '-2.5': 0.5}",,,Bucolici,"̔Αδύ τι τὸ ψιθύρισμα καὶ ἁ πίτυς αἰπόλε τήνα,...",1982,"[[αδύ, ψιθύρισμα, πίτυς, αἰπόλος, τῆνος, πηγή,..."
4,tlg0005.tlg002.perseus-grc1.xml,Theocritus,Epigrams,1734,tlg0005,tlg0005.tlg002,4-3 B.C.,-3.0,"{'-3.5': 0.5, '-2.5': 0.5}",,,Bucolici,τὰ ῥόδα τὰ δροσόεντα καὶ ἁ κατάπυκνος ἐκείνα ἕ...,152,"[[ῥόδον, δροσόεντα, κατάπυκνος, ἐκεῖνος, ἕρπυλ..."


In [8]:
def flat_lemmata(sentences):
    lemmata = [word for sent in sentences for word in sent]
    return lemmata
AGT["lemmata"] = AGT["lemmatized_sentences"].apply(flat_lemmata)

AGT["lemmata_wordcount"] = AGT.apply(lambda row: len(row["lemmata"]), axis=1)
AGT["lemmata_wordcount"].sum() # previously we had 13925726, then 13713183, 14373580, 14342245

14383627

In [18]:
sub_df = AGT[AGT["author_id"]=="tlg0031john"]
sub_df["date_probs"]

388    {'0.5': 1}
407    {'0.5': 1}
408    {'0.5': 1}
409    {'0.5': 1}
Name: date_probs, dtype: object

In [13]:
probs_dict = {}
for d in sub_df["date_probs"]:
    probs_dict.update(d)
probs_dict

{'0.5': 1}

# Comparing All Authors

In [14]:
def grouping_gnt(author_id):
    if author_id == "tlg0031a":
        author_id = "tlg0031matt"
    if author_id == "tlg0031b":
        author_id = "tlg0031mark"
    if re.match("tlg0031\w?$", author_id):
        return "tlg0031rest"
    else:
        return author_id
new_author_ids = AGT["author_id"].apply(grouping_gnt)

AGT["author_id"] = new_author_ids

In [16]:
def get_flat_sentences(series_sentences):
    try: return [sent for work in series_sentences.tolist() for sent in work]
    except: return []

In [21]:
john_sentences = get_flat_sentences(sub_df["lemmatized_sentences"])
len(john_sentences)

1339

In [22]:
authors_dicts = []
for author_id in AGT["author_id"].unique():
    sub_df = AGT[AGT["author_id"]==author_id]
    authors_dict = {}
    authors_dict["author_id"] = author_id
    authors_dict["author"] = sub_df["author"][0]
    authors_dict["n_titles"] = len(sub_df)
    authors_dict["titles"] = sub_df["title"].tolist()
    authors_dict["filenames"] = sub_df["filename"].tolist()
    authors_dict["wordcount"] = sub_df["wordcount"].sum()
    authors_dict["lemmata_wordcount"] = sub_df["lemmata_wordcount"].sum()
    authors_dict["n_sentences"] = sub_df["n_sentences"].sum()
    probs_dict = {}
    for d in sub_df["date_probs"]:
        probs_dict.update(d)
    authors_dict["date_probs"] = probs_dict
    authors_dict["date_avr"] = sub_df["date_avr"].tolist()[0]
    authors_dict["provenience"] = sub_df["provenience"].tolist()[0]
    authors_dict["lemmata_lists"] = sub_df["lemmata"].tolist()
    authors_dict["lemmatized_sentences"] = get_flat_sentences(sub_df["lemmatized_sentences"])
    authors_dicts.append(authors_dict)
    
authors_df = pd.DataFrame(authors_dicts)
authors_df.head(5)

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences
0,tlg0001,Apollonius Rhodius,1,[Argonautica],[tlg0001.tlg001.perseus-grc2.xml],38822,23777,3252,{'-2.5': 1},-2.5,pagan,"[[ἄρχω, Φοῖβος, παλαιγενής, κλέος, φώς, μιμνήσ...","[[ἄρχω, Φοῖβος, παλαιγενής, κλέος, φώς, μιμνήσ..."
1,tlg0003,Thucydides,1,[The Peloponnesian War],[tlg0003.tlg001.perseus-grc2.xml],150118,71863,6068,{'-4.5': 1},-4.5,pagan,"[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ...","[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ..."
2,tlg0004,Diogenes Laertius,1,[Lives of Eminent Philosophers],[tlg0004.tlg001.perseus-grc1.xml],110763,56872,10245,{'2.5': 1},2.5,,"[[φιλοσοφία, ἔργον, ἔνιοι, φημί, βάρβαρος, ἄρχ...","[[φιλοσοφία, ἔργον, ἔνιοι, φημί, βάρβαρος, ἄρχ..."
3,tlg0005,Theocritus,3,"[Idylls , Epigrams , Syrinx]","[tlg0005.tlg001.perseus-grc1.xml, tlg0005.tlg0...",21011,13016,2137,"{'-3.5': 0.5, '-2.5': 0.5}",-3.0,,"[[αδύ, ψιθύρισμα, πίτυς, αἰπόλος, τῆνος, πηγή,...","[[αδύ, ψιθύρισμα, πίτυς, αἰπόλος, τῆνος, πηγή,..."
4,tlg0006,Euripides,17,"[Cyclops, Ἡρακλεῖδαι, Ἱππόλυτος, Ἀνδρομάχη, Ἑκ...","[tlg0006.tlg001.perseus-grc2.xml, tlg0006.tlg0...",134129,81439,14482,{'-4.5': 1},-4.5,pagan,"[[Βρόμιος, ἔχω, πόνος, χὥτʼ, ἥβη, ἐμός, εὐσθεν...","[[Βρόμιος, ἔχω, πόνος, χὥτʼ, ἥβη, ἐμός, εὐσθεν..."


In [65]:
authors_df[authors_df["provenience"]=="jewish"]

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences,dikaiosyne_N,dikaiosyne_TF,dik_sents_flat,dik_sents_TF,dik_sents_TFIDF,dik_sents_TFIDF_10
84,tlg0527,Septuaginta,55,"[Genesis, Exodus, Leviticus, Numeri, Deuterono...","[tlg0527.tlg001.opp-grc2.xml, tlg0527.tlg002.o...",799083,375155,57856,{},,jewish,"[[εἰμί, ποιέω, θεός, οὐρανός, γῆ, γῆ, εἰμί, ἀό...","[[εἰμί, ποιέω, θεός, οὐρανός, γῆ], [γῆ, εἰμί, ...",341,0.000427,"[πιστεύω, ἀβρὰμ, θεός, λογίζομαι, οἶδα, σύνταξ...","[(κύριος, 0.04013), (ποιέω, 0.02946), (θεός, 0...","[(κρίμα, 0.08576), (ποιέω, 0.08547), (κύριος, ...","κρίμα, ποιέω, κύριος, ἀλήθεια, θεός, ἀδικία, ὁ..."
21,tlg0018,Philo Judaeus,31,"[De opificio mundi, Legum allegoriarum libri i...","[tlg0018.tlg001.opp-grc1.xml, tlg0018.tlg002.o...",643011,304615,82966,"{'-0.5': 0.5, '0.5': 0.5}",0.0,jewish,"[[ἄλλος, νομοθέτης, ἀκαλλώπιστος, γυμνός, νομι...","[[ἄλλος, νομοθέτης, ἀκαλλώπιστος, γυμνός, νομι...",157,0.000244,"[ἐκεῖνος, ἀγνοητέον, πρῶτος, ἀριθμός, τέσσαρες...","[(ἀρετή, 0.02197), (φρόνησις, 0.01862), (εἰμί,...","[(φρόνησις, 0.11227), (ἀρετή, 0.09582), (σωφρο...","φρόνησις, ἀρετή, σωφροσύνη, ἀνδρεία, ἰσότης, ἀ..."
83,tlg0526,Flavius Josephus,4,"[Antiquitates Judaicae, Josephi vita, Contra A...","[tlg0526.tlg001.perseus-grc1.xml, tlg0526.tlg0...",469580,240661,19707,{'0.5': 1},0.5,jewish,"[[ἱστορία, συγγράφω, βούλομαι, αὐτός, ὁράω, σπ...","[[ἱστορία, συγγράφω, βούλομαι, αὐτός, ὁράω, σπ...",39,8.3e-05,"[ἄβελος, νέος, ἐπιμελέομαι, πράσσω, πάρειμι, θ...","[(εἰμί, 0.02242), (θεός, 0.02093), (εὐσέβεια, ...","[(εὐσέβεια, 0.10229), (θεός, 0.05143), (εἰμί, ...","εὐσέβεια, θεός, εἰμί, ἀρετή, δόξα, πολύς, ἀμάν..."


In [23]:
clean_titles = authors_df["titles"].apply(lambda cell: [title.rpartition(" - ")[2] for title in cell]) 
authors_df["titles"] = clean_titles

In [24]:
def fill_author(author, titles):
    if author == None:
        return titles[0]
    else:
        return author.partition(",")[0]
 
clean_authors = authors_df.apply(lambda row: fill_author(row["author"], row["titles"]), axis=1)
authors_df["author"] = clean_authors

In [25]:
authors_df.loc[authors_df["author_id"]=="tlg0031rest", "author"] = "GNT rest" #, ["author"]] = 


In [26]:
authors_df.loc[authors_df["author_id"]=="tlg1484", "author"] = "Martyrium Polycarpi"
authors_df.loc[authors_df["author_id"]=="tlg1311", "author"] = "Didache" #, ["author"]] = #, ["author"]] = 

In [27]:
authors_df

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences
0,tlg0001,Apollonius Rhodius,1,[Argonautica],[tlg0001.tlg001.perseus-grc2.xml],38822,23777,3252,{'-2.5': 1},-2.5,pagan,"[[ἄρχω, Φοῖβος, παλαιγενής, κλέος, φώς, μιμνήσ...","[[ἄρχω, Φοῖβος, παλαιγενής, κλέος, φώς, μιμνήσ..."
1,tlg0003,Thucydides,1,[The Peloponnesian War],[tlg0003.tlg001.perseus-grc2.xml],150118,71863,6068,{'-4.5': 1},-4.5,pagan,"[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ...","[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ..."
2,tlg0004,Diogenes Laertius,1,[Lives of Eminent Philosophers],[tlg0004.tlg001.perseus-grc1.xml],110763,56872,10245,{'2.5': 1},2.5,,"[[φιλοσοφία, ἔργον, ἔνιοι, φημί, βάρβαρος, ἄρχ...","[[φιλοσοφία, ἔργον, ἔνιοι, φημί, βάρβαρος, ἄρχ..."
3,tlg0005,Theocritus,3,"[Idylls , Epigrams , Syrinx]","[tlg0005.tlg001.perseus-grc1.xml, tlg0005.tlg0...",21011,13016,2137,"{'-3.5': 0.5, '-2.5': 0.5}",-3.0,,"[[αδύ, ψιθύρισμα, πίτυς, αἰπόλος, τῆνος, πηγή,...","[[αδύ, ψιθύρισμα, πίτυς, αἰπόλος, τῆνος, πηγή,..."
4,tlg0006,Euripides,17,"[Cyclops, Ἡρακλεῖδαι, Ἱππόλυτος, Ἀνδρομάχη, Ἑκ...","[tlg0006.tlg001.perseus-grc2.xml, tlg0006.tlg0...",134129,81439,14482,{'-4.5': 1},-4.5,pagan,"[[Βρόμιος, ἔχω, πόνος, χὥτʼ, ἥβη, ἐμός, εὐσθεν...","[[Βρόμιος, ἔχω, πόνος, χὥτʼ, ἥβη, ἐμός, εὐσθεν..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,tlg7000,"Greek Anthology, Volume V",1,"[Greek Anthology, Volume V]",[tlg7000.tlg001.perseus-grc5.xml],20222,13460,1971,{},,,"[[ἰίαίρβ, θεά, Πάφος, σός, δύναμις, κάλλος, ἀθ...","[[ἰίαίρβ, θεά, Πάφος], [σός, δύναμις, κάλλος, ..."
265,tlg9004,Anonymi In Aristotelis Librum Alterum Analytic...,1,[Anonymi in analyticorum posteriorum librum al...,[tlg9004.tlg001.opp-grc1.xml],21649,8495,2414,{},,,"[[ζητούμενά, ἴσος, ἀριθμός, ὅσος, ἐπίσταμαι, φ...","[[], [ζητούμενά, ἴσος, ἀριθμός, ὅσος, ἐπίσταμα..."
266,tlg9006,Libanius,1,[Libanius Opera],[tlg9006.tlg011.opp-grc1.xml],6745,3175,392,{'12.5': 1},12.5,,"[[ἀνήρ, Λακεδαιμόνιος, ἡγέομαι, κοριν, θίους, ...","[[ἀνήρ, Λακεδαιμόνιος, ἡγέομαι, κοριν, θίους, ..."
267,tlg9010,Suda,1,[Suidae lexicon],[tlg9010.tlg001.1st1K-grc1.xml],589422,292441,139897,{'9.5': 1},9.5,,"[[πάρειμι, βιβλίον, σοῦδα, συνταξάμενοι, ἀνήρ,...","[[πάρειμι, βιβλίον, σοῦδα, συνταξάμενοι, ἀνήρ,..."


In [28]:
len(authors_df)

269

In [29]:
# COUNT δικαιοσύνη IN SENTENCES
def count_term_in_sents(lemmatized_sentences):
    return sum([sent.count("δικαιοσύνη") for sent in lemmatized_sentences])

authors_df["dikaiosyne_N"]  = authors_df["lemmatized_sentences"].apply(count_term_in_sents)

In [30]:
authors_df["dikaiosyne_TF"]  = authors_df.apply(lambda row: row["dikaiosyne_N"] / row["wordcount"], axis=1)
authors_df.sort_values("dikaiosyne_TF", ascending=False, inplace=True)

In [33]:
authors_df[authors_df["author"].str.contains("Sal")]

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences,dikaiosyne_N,dikaiosyne_TF


In [34]:
authors_df[authors_df["dikaiosyne_N"]>0].sort_values("date_avr").head(30)

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences,dikaiosyne_N,dikaiosyne_TF
70,tlg0096,Aesop,1,[Fabulae],[tlg0096.tlg002.First1K-grc1.xml],39492,21577,3333,{'-5.5': 1},-5.5,pagan,"[[ἀγαθός, πᾶς, κακός, διώκω, ἀσθενής, εἰμί, οὐ...","[[ἀγαθός, πᾶς, κακός, διώκω, ἀσθενής, εἰμί], [...",2,5.1e-05
1,tlg0003,Thucydides,1,[The Peloponnesian War],[tlg0003.tlg001.perseus-grc2.xml],150118,71863,6068,{'-4.5': 1},-4.5,pagan,"[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ...","[[θουκυδίδης, Ἀθηναῖος, συγγράφω, πόλεμος, Πελ...",1,7e-06
19,tlg0016,Herodotus,1,[The Histories],[tlg0016.tlg001.perseus-grc2.xml],184947,94799,10149,{'-4.5': 1},-4.5,pagan,"[[Ἡρόδοτος, Ἁλικαρνασσεύς, ἱστορία, ἀπόδειξις,...","[[Ἡρόδοτος, Ἁλικαρνασσεύς, ἱστορία, ἀπόδειξις,...",8,4.3e-05
89,tlg0540,Lysias,34,"[On the Murder of Eratosthenes, Funeral Oratio...","[tlg0540.tlg001.perseus-grc2.xml, tlg0540.tlg0...",58875,28254,4258,"{'-4.5': 0.5, '-3.5': 0.5}",-4.0,pagan,"[[πολύς, ποιέω, ἀνήρ, τοιοῦτος, δικαστής, οὗτο...","[[πολύς, ποιέω, ἀνήρ, τοιοῦτος, δικαστής, οὗτο...",2,3.4e-05
38,tlg0032,Xenophon,14,"[Hellenica, Memorabilia, Economics, Symposium,...","[tlg0032.tlg001.perseus-grc2.xml, tlg0032.tlg0...",312187,150357,18988,"{'-4.5': 0.5, '-3.5': 0.5}",-4.0,pagan,"[[οὗτος, πολύς, ἡμέρα, ὕστερος, ἔρχομαι, Ἀθῆνα...","[[οὗτος, πολύς, ἡμέρα, ὕστερος, ἔρχομαι, Ἀθῆνα...",35,0.000112
122,tlg0627,Hippocrates,52,"[On Ancient Medicine, De aere aquis et locis ,...","[tlg0627.tlg001.perseus-grc1.xml, tlg0627.tlg0...",333443,171332,24456,"{'-4.5': 0.5, '-3.5': 0.5}",-4.0,pagan,"[[ὁπόσος, ἐπιχειρέω, ἰητρικῆς, λέγω, γράφω, ὑπ...","[[ὁπόσος, ἐπιχειρέω, ἰητρικῆς, λέγω, γράφω, ὑπ...",2,6e-06
45,tlg0059,Plato,37,"[Euthyphro, Apology, Crito, Phaedo, Cratylus, ...","[tlg0059.tlg001.perseus-grc1.xml, tlg0059.tlg0...",574294,272931,52384,"{'-4.5': 0.5, '-3.5': 0.5}",-4.0,pagan,"[[ευθ, νέος, Σωκράτης, γίγνομαι, λύκειον, κατα...","[[ευθ], [νέος, Σωκράτης, γίγνομαι, λύκειον, κα...",267,0.000465
7,tlg0010,Isocrates,30,"[Against Euthynus, Against Callimachus, Agains...","[tlg0010.tlg001.perseus-grc2.xml, tlg0010.tlg0...",119155,56497,4681,"{'-4.5': 0.5, '-3.5': 0.5}",-4.0,pagan,"[[πρόφασις, ἀπορέω, λέγω, νικίου, οὗτος, φίλος...","[[πρόφασις, ἀπορέω, λέγω, νικίου, οὗτος], [φίλ...",43,0.000361
17,tlg0014,Demosthenes,63,"[Ὀλυνθιακὸς α΄, Ὀλυνθιακὸς β΄, Ὀλυνθιακὸς γ΄, ...","[tlg0014.tlg001.perseus-grc2.xml, tlg0014.tlg0...",294362,143390,14564,{'-3.5': 1},-3.5,pagan,"[[πολύς, ἀνήρ, Ἀθηναῖος, χρῆμα, αἱρέω, νομίζω,...","[[πολύς, ἀνήρ, Ἀθηναῖος, χρῆμα, αἱρέω, νομίζω,...",11,3.7e-05
63,tlg0086,Aristotle,35,"[Aristotelis Analytica Priora et Posteriora, D...","[tlg0086.tlg001.1st1K-grc2.xml, tlg0086.tlg002...",840271,367848,60282,{'-3.5': 1},-3.5,pagan,"[[πρῶτος, λέγω, εἰμί, σκέψις, ἀπόδειξις, ἐπιστ...","[[πρῶτος, λέγω, εἰμί, σκέψις, ἀπόδειξις, ἐπιστ...",162,0.000193


# Networks for authors

In [251]:
reload(textnet)

<module 'textnet' from '/home/kasev/ECCE_AGT/scripts/textnet.py'>

In [199]:
%%time
nx_from_sentences = authors_df["lemmatized_sentences"].apply(textnet.network_from_sentences)
nx_from_lemmata = authors_df["lemmata_lists"].apply(textnet.network_from_lemmata_lists)

CPU times: user 23min 19s, sys: 1min 3s, total: 24min 22s
Wall time: 24min 23s


In [200]:
authors_df["G_sents"] = nx_from_sentences
authors_df["G_lems"] = nx_from_lemmata

In [238]:
def get_nn(source_network, source_node, per_level=5):
    return [(source_node, n[0]) for n in sorted(source_network[source_node].items(), key=lambda edge: edge[1]['weight'], reverse=True)][:per_level]

def construct_association_network(source_network, source_term, per_level=5):
    assoc_edges = [] 
    assoc_edges.extend(get_nn(source_network, source_term, per_level))
    neighbors = [e[1] for e in assoc_edges]
    for nn in neighbors:
        assoc_edges.extend(get_nn(source_network, nn))

    assoc_network = source_network.copy(as_view=False)
    edges_to_remove = []
    for edge in assoc_network.edges():
        if edge not in assoc_edges:
            if (edge[1],edge[0]) not in assoc_edges:
                edges_to_remove.append(edge)
    assoc_network.remove_edges_from(edges_to_remove)
    isolates = nx.isolates(assoc_network)
    assoc_network.remove_nodes_from([n for n in isolates])
    return assoc_network

In [240]:
construct_association_network(authors_df.iloc[0]["G_sents"], "δικαιοσύνη").nodes()

NodeView(('φιλαργυρία', 'κύριος', 'πᾶς', 'οἶδα', 'εἰμί', 'ετ', 'οὗτος', 'ἀγαπάω', 'ἐγείρω', 'δικαιοσύνη', 'ἁγνεία', 'ἐντολή', 'θεός', 'πίστις', 'κρίσις'))

In [252]:
def assoc_network(G):
    try:
        G_assoc = textnet.construct_association_network(G, "δικαιοσύνη")
    except:
        G_assoc = nx.Graph()
        G_assoc.clear()
    return G_assoc

In [253]:
authors_df["dikaiosyne_G_sents"] = authors_df["G_sents"].apply(assoc_network)
authors_df["dikaiosyne_G_lems"] = authors_df["G_lems"].apply(assoc_network)

In [260]:
list(authors_df.iloc[15]["dikaiosyne_G_lems"].nodes())

['θεός',
 'λέγω',
 'οὗτος',
 'γίγνομαι',
 'ὄνομα',
 'πατήρ',
 'ποιέω',
 'Ἰακώβ',
 'καλέω',
 'λαός',
 'χείρ',
 'κρίσις',
 'ἡμέρα',
 'οἶκος',
 'ἀπειθέω',
 'δικαιοσύνη',
 'εὐσέβεια',
 'φιλοσοφία',
 'κρατέω',
 'δικαιοπραξίας']

# Terms within sentences

In [35]:
def sentences_with_term(sentences, term, flatlist=False):
    sentences_with_term = [sen for sen in sentences if term in sen]
    if flatlist == True:
        flatlist = [word for sen in sentences_with_term for word in sen]
        flatlist = [word for word in flatlist if word != "δικαιοσύνη"]
        return flatlist
    else: 
        return sentences_with_term
authors_df["dik_sents_flat"] = authors_df["lemmatized_sentences"].apply(lambda cell: sentences_with_term(cell, "δικαιοσύνη", flatlist=True))

In [36]:
def get_tf(lemmata):
    fd_list = list(nltk.FreqDist(lemmata).most_common())
    tf_list = [(tup[0], np.round(tup[1] / len(lemmata), 5)) for tup in fd_list]
    return tf_list

authors_df["dik_sents_TF"] = authors_df["dik_sents_flat"].apply(get_tf)

In [37]:
def term_idf(term, all_sentences):
    idf_raw =  len(all_sentences) / len([sent for sent in all_sentences if term in sent])
    idf_log = math.log(idf_raw)
    return idf_log

def get_tfidf(key_term_lemmata, all_sentences):
    fd_list = list(nltk.FreqDist(key_term_lemmata).most_common())
    tfidf_list = [(tup[0], np.round((tup[1] / len(key_term_lemmata) * term_idf(tup[0], all_sentences)), 5)) for tup in fd_list]
    tfidf_list = sorted(tfidf_list, key=lambda tup: tup[1], reverse=True)
    return tfidf_list

authors_df["dik_sents_TFIDF"] = authors_df.apply(lambda x: get_tfidf(x["dik_sents_flat"], x["lemmatized_sentences"]), axis=1)

In [38]:
authors_df["dik_sents_TFIDF_10"] = authors_df["dik_sents_TFIDF"].apply(lambda cell: ", ".join([tup[0] for tup in cell[:10]]))

In [39]:
authors_df.head(5)

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences,dikaiosyne_N,dikaiosyne_TF,dik_sents_flat,dik_sents_TF,dik_sents_TFIDF,dik_sents_TFIDF_10
175,tlg1622,Polycarp,1,[Epistula ad Philippenses],[tlg1622.tlg001.1st1K-grc1.xml],1804,951,212,"{'0.5': 0.5, '1.5': 0.5}",1.0,christian,"[[πολύκαρπος, πρέσβυς, ἐκκλησία, θεός, παροικο...","[[πολύκαρπος, πρέσβυς, ἐκκλησία, θεός, παροικο...",8,0.004435,"[μακάριος, πτωχός, διώκω, εἰμί, βασιλεία, θεάο...","[(εἰμί, 0.04651), (οὗτος, 0.04651), (ἐντολή, 0...","[(οὗτος, 0.19805), (ἐντολή, 0.18466), (πτωχός,...","οὗτος, ἐντολή, πτωχός, διώκω, θεάομαι, ἀδελφός..."
35,tlg0031paul,Paul of Tarsus,7,"[Romans, 1 Corinthians, 2 Corinthians, Galatia...","[tlg0031.tlg006.perseus-grc2.xml, tlg0031.tlg0...",24066,11089,1632,{'0.5': 1},0.5,christian,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...","[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",49,0.002036,"[θεός, ἀποκαλύπτω, πίστις, πίστις, γράφω, δίκα...","[(θεός, 0.04675), (πίστις, 0.04156), (νόμος, 0...","[(πίστις, 0.12745), (ἁμαρτία, 0.11192), (νόμος...","πίστις, ἁμαρτία, νόμος, λογίζομαι, θεός, βασιλ..."
156,tlg1271,Clemens Romanus,2,"[Epistula I ad Corinthios, Epistula II ad Cori...","[tlg1271.tlg001.1st1K-grc1.xml, tlg1271.tlg002...",13936,6290,1410,{'0.5': 1},0.5,christian,"[[ἐκκλησία, θεός, παροικοῦσα, Ῥώμη, ἐκκλησία, ...","[[ἐκκλησία, θεός, παροικοῦσα, Ῥώμη, ἐκκλησία, ...",21,0.001507,"[οὗτος, ἄπειμι, εἰρήνη, ἀπολιμπάνω, ἕκαστος, φ...","[(θεός, 0.02871), (πίστις, 0.02392), (γίγνομαι...","[(καυχάομαι, 0.10409), (πίστις, 0.10181), (πύλ...","καυχάομαι, πίστις, πύλη, θεός, δύσις, ἀγάπη, π..."
130,tlg0646,Pseudo-Justinus Martyr,1,[Epistula ad Diognetum],[tlg0646.tlg004.1st1K-grc1.xml],2725,1364,240,"{'2.5': 0.5, '4.5': 0.5}",3.5,christian,"[[ὁράω, κράτιστος, διόγνητε, ὑπερεσπουδακότα, ...","[[ὁράω, κράτιστος, διόγνητε, ὑπερεσπουδακότα, ...",4,0.001468,"[ἐφηδόμενος, ἁμάρτημα, ἀνεχόμενος, ἀδικία, και...","[(ἐκεῖνος, 0.05556), (πολύς, 0.05556), (πῦρ, 0...","[(ἐκεῖνος, 0.24345), (πῦρ, 0.21507), (πολύς, 0...","ἐκεῖνος, πῦρ, πολύς, ἐφηδόμενος, ἁμάρτημα, ἀνε..."
152,tlg1216,Barnabas,1,[The Epistle of Barnabas],[tlg1216.tlg001.perseus-grc2.xml],7440,3450,875,"{'0.5': 0.5, '1.5': 0.5}",1.0,christian,"[[χαίρω, υἱός, θυγάτηρ, ὄνομα, κύριος, ἀγαπάω,...","[[χαίρω, υἱός, θυγάτηρ, ὄνομα, κύριος, ἀγαπάω,...",9,0.00121,"[πείθω, οὗτος, σύνοιδα, λαλέω, πολύς, ἐπίσταμα...","[(ὁδός, 0.0411), (κύριος, 0.0274), (θεός, 0.02...","[(ὁδός, 0.17298), (σκότος, 0.13651), (ἀγαθός, ...","ὁδός, σκότος, ἀγαθός, σύνοιδα, ἐπίσταμαι, συνώ..."


In [40]:
tfidf_words = list(set([word for wordlist in authors_df["dik_sents_TFIDF_10"].tolist() for word in wordlist]))
tfidf_words = [word for word in tfidf_words if not re.search("\W", word)]

In [139]:
#set_with_dataframe(ECCE_AGT_overview.add_worksheet("tfidf_words_20201205", 1, 1), pd.DataFrame(tfidf_words))

In [166]:
columns_to_omit = ["lemmatized_sentences", "lemmata_lists", "G_sents", "G_lems", "dikaiosyne_G_sents" , "dikaiosyne_G_lems"]
set_with_dataframe(ECCE_AGT_overview.add_worksheet("authors_df_minimal_20201211", 1,1), authors_df.drop(columns_to_omit, axis=1))

# Individual authors

In [41]:
authors_df[authors_df["author"].str.contains("Isocrates")]["dik_sents_TFIDF_10"].tolist()[0]

'σωφροσύνη, ἀρετή, εὐσέβεια, εἰμί, ἀδικία, σοφία, πολύς, ἔχω, μέγας, οὗτος'

In [42]:
authors_df[authors_df["author"].str.contains("Aristotle")]["dik_sents_TFIDF_10"].tolist()[0]

'δίκαιος, ἀρετή, ἀδικία, ἀνδρεία, σωφροσύνη, ἀγαθός, εἰμί, οἷος, ἄλλος, ἕτερος'

In [43]:
reload(textnet)

<module 'textnet' from '/home/kasev/ECCE_AGT/scripts/textnet.py'>

In [372]:
G = authors_df[authors_df["author"].str.contains("Paul of")]["dikaiosyne_G_sents"].tolist()[0]
fig = textnet.draw_2d_network(G)
fig.show()

In [364]:
#sddk.write_file("SDAM_data/ECCE_public/G_sents_paul.png",fig, conf)

In [373]:
reload(textnet)

<module 'textnet' from '/home/kasev/ECCE_AGT/scripts/textnet.py'>

In [375]:
G = authors_df[authors_df["author"].str.contains("Pauline ")]["dikaiosyne_G_sents"].tolist()[0]
fig = textnet.draw_2d_network(G)
fig.show()

In [188]:
sddk.write_file("SDAM_data/ECCE_public/G_sents_pauline.png",fig, conf)

Your <class 'plotly.graph_objs._figure.Figure'> object has been succefully written as "https://sciencedata.dk/files/SDAM_root/SDAM_data/ECCE_public/G_sents_pauline.png"


# N of shared terms within dik_sents_TFIDF_10

In [271]:
authors_df["dik_sents_TFIDF_10"].tolist()[:5]

['οὗτος, ἐντολή, πτωχός, διώκω, θεάομαι, ἀδελφός, ἐπιτρέπω, προεπεκαλέσασθέ, τὶς, πληρόω',
 'πίστις, ἁμαρτία, νόμος, λογίζομαι, θεός, βασιλεύω, δοῦλος, παρίστημι, πιστεύω, Χριστός',
 'καυχάομαι, πίστις, πύλη, θεός, δύσις, ἀγάπη, πατήρ, ὑπομονή, ἄνθρωπος, Ἀβραάμ',
 'ἐκεῖνος, πῦρ, πολύς, ἐφηδόμενος, ἁμάρτημα, ἀνεχόμενος, συνευδοκέω, ἐλεγχθέντες, ἀνάξιος, καλύπτω',
 'ὁδός, σκότος, ἀγαθός, σύνοιδα, ἐπίσταμαι, συνώδευσεν, ἀναγκάζω, ἀνατελεῖ, προπορεύσεται, περιστελεῖ']

In [44]:
philo_josephus = authors_df[authors_df["author_id"].isin(["tlg0526", "tlg0018"])]

In [80]:
authors_df_c.columns

Index(['author_id', 'author', 'n_titles', 'titles', 'filenames', 'wordcount',
       'lemmata_wordcount', 'n_sentences', 'date_probs', 'date_avr',
       'provenience', 'lemmata_lists', 'lemmatized_sentences', 'dikaiosyne_N',
       'dikaiosyne_TF', 'dik_sents_flat', 'dik_sents_TF', 'dik_sents_TFIDF',
       'dik_sents_TFIDF_10'],
      dtype='object')

In [81]:
authors_df_c = authors_df[(authors_df["provenience"] == "christian") & (authors_df["date_avr"] < 4) & (authors_df["dikaiosyne_N"] > 0)]

In [82]:
# christian + philo + josephus
authors_df_cj = pd.concat([authors_df_c, philo_josephus])

In [83]:
authors_df_cj

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences,dikaiosyne_N,dikaiosyne_TF,dik_sents_flat,dik_sents_TF,dik_sents_TFIDF,dik_sents_TFIDF_10
175,tlg1622,Polycarp,1,[Epistula ad Philippenses],[tlg1622.tlg001.1st1K-grc1.xml],1804,951,212,"{'0.5': 0.5, '1.5': 0.5}",1.0,christian,"[[πολύκαρπος, πρέσβυς, ἐκκλησία, θεός, παροικο...","[[πολύκαρπος, πρέσβυς, ἐκκλησία, θεός, παροικο...",8,0.004435,"[μακάριος, πτωχός, διώκω, εἰμί, βασιλεία, θεάο...","[(εἰμί, 0.04651), (οὗτος, 0.04651), (ἐντολή, 0...","[(οὗτος, 0.19805), (ἐντολή, 0.18466), (πτωχός,...","οὗτος, ἐντολή, πτωχός, διώκω, θεάομαι, ἀδελφός..."
35,tlg0031paul,Paul of Tarsus,7,"[Romans, 1 Corinthians, 2 Corinthians, Galatia...","[tlg0031.tlg006.perseus-grc2.xml, tlg0031.tlg0...",24066,11089,1632,{'0.5': 1},0.5,christian,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...","[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",49,0.002036,"[θεός, ἀποκαλύπτω, πίστις, πίστις, γράφω, δίκα...","[(θεός, 0.04675), (πίστις, 0.04156), (νόμος, 0...","[(πίστις, 0.12745), (ἁμαρτία, 0.11192), (νόμος...","πίστις, ἁμαρτία, νόμος, λογίζομαι, θεός, βασιλ..."
156,tlg1271,Clemens Romanus,2,"[Epistula I ad Corinthios, Epistula II ad Cori...","[tlg1271.tlg001.1st1K-grc1.xml, tlg1271.tlg002...",13936,6290,1410,{'0.5': 1},0.5,christian,"[[ἐκκλησία, θεός, παροικοῦσα, Ῥώμη, ἐκκλησία, ...","[[ἐκκλησία, θεός, παροικοῦσα, Ῥώμη, ἐκκλησία, ...",21,0.001507,"[οὗτος, ἄπειμι, εἰρήνη, ἀπολιμπάνω, ἕκαστος, φ...","[(θεός, 0.02871), (πίστις, 0.02392), (γίγνομαι...","[(καυχάομαι, 0.10409), (πίστις, 0.10181), (πύλ...","καυχάομαι, πίστις, πύλη, θεός, δύσις, ἀγάπη, π..."
130,tlg0646,Pseudo-Justinus Martyr,1,[Epistula ad Diognetum],[tlg0646.tlg004.1st1K-grc1.xml],2725,1364,240,"{'2.5': 0.5, '4.5': 0.5}",3.5,christian,"[[ὁράω, κράτιστος, διόγνητε, ὑπερεσπουδακότα, ...","[[ὁράω, κράτιστος, διόγνητε, ὑπερεσπουδακότα, ...",4,0.001468,"[ἐφηδόμενος, ἁμάρτημα, ἀνεχόμενος, ἀδικία, και...","[(ἐκεῖνος, 0.05556), (πολύς, 0.05556), (πῦρ, 0...","[(ἐκεῖνος, 0.24345), (πῦρ, 0.21507), (πολύς, 0...","ἐκεῖνος, πῦρ, πολύς, ἐφηδόμενος, ἁμάρτημα, ἀνε..."
152,tlg1216,Barnabas,1,[The Epistle of Barnabas],[tlg1216.tlg001.perseus-grc2.xml],7440,3450,875,"{'0.5': 0.5, '1.5': 0.5}",1.0,christian,"[[χαίρω, υἱός, θυγάτηρ, ὄνομα, κύριος, ἀγαπάω,...","[[χαίρω, υἱός, θυγάτηρ, ὄνομα, κύριος, ἀγαπάω,...",9,0.00121,"[πείθω, οὗτος, σύνοιδα, λαλέω, πολύς, ἐπίσταμα...","[(ὁδός, 0.0411), (κύριος, 0.0274), (θεός, 0.02...","[(ὁδός, 0.17298), (σκότος, 0.13651), (ἀγαθός, ...","ὁδός, σκότος, ἀγαθός, σύνοιδα, ἐπίσταμαι, συνώ..."
164,tlg1419,Hermas,1,[The Shepherd of Hermas],[tlg1419.tlg001.1st1K-grc1.xml],30282,14340,2959,{'1.5': 1},1.5,christian,"[[τρέφω, πέρνημι, ῥόδῃ, τὶς, Ῥώμη, πολύς, ἔτος...","[[τρέφω, πέρνημι, ῥόδῃ, τὶς, Ῥώμη], [πολύς, ἔτ...",33,0.00109,"[λέγω, προηγουμένοις, ἐκκλησία, κατορθώσωνται,...","[(ἄγγελος, 0.03333), (ἐργάζομαι, 0.03), (εἰμί,...","[(ἄγγελος, 0.12433), (ἐργάζομαι, 0.12302), (ἀρ...","ἄγγελος, ἐργάζομαι, ἀρετή, ἔργον, ἀλήθεια, πον..."
36,tlg0031pspa,Pauline literature,6,"[Ephesians, Colossians, 2 Thessalonians, 1 Tim...","[tlg0031.tlg010.perseus-grc2.xml, tlg0031.tlg0...",8324,4103,390,{'0.5': 1},0.5,christian,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...","[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",8,0.000961,"[μανθάνω, χριστός, ἀκούω, διδάσκω, ἀλήθεια, Ἰη...","[(ἀλήθεια, 0.03252), (ἄνθρωπος, 0.02439), (θεό...","[(ἀλήθεια, 0.0935), (διώκω, 0.07915), (ἐνδύω, ...","ἀλήθεια, διώκω, ἐνδύω, φῶς, ἐπιθυμία, ἄνθρωπος..."
37,tlg0031rest,GNT rest,6,"[Hebrews, James, 1 Peter, 2 Peter, Jude, Revel...","[tlg0031.tlg019.perseus-grc2.xml, tlg0031.tlg0...",19782,9755,1037,{'0.5': 1},0.5,christian,"[[πολύτροπος, θεός, λαλέω, πατήρ, προφήτης, ἔσ...","[[πολύτροπος, θεός, λαλέω, πατήρ, προφήτης, ἔσ...",17,0.000859,"[ἀγαπάω, μισέω, ἀνομία, μετέχω, γάλα, ἄπειρος,...","[(θεός, 0.02703), (πίστις, 0.02317), (βασιλεύς...","[(βασιλεύς, 0.07352), (πίστις, 0.0668), (δίκαι...","βασιλεύς, πίστις, δίκαιος, Σαλήμ, βραδύς, φείδ..."
158,tlg1311,Didache,1,[Didache XII Apostolorum],[tlg1311.tlg001.1st1K-grc1.xml],2377,1049,284,{'1.5': 1},1.5,christian,"[[ὁδός, δύο, εἰμί, εἷς, ζωή, εἷς, θάνατος, δια...","[[ὁδός, δύο, εἰμί, εἷς, ζωή, εἷς, θάνατος, δια...",2,0.000841,"[διῶκται, ἀγαθός, μισέω, ἀλήθεια, ἀγαπῶντες, ψ...","[(κύριος, 0.14286), (διῶκται, 0.07143), (ἀγαθό...","[(διῶκται, 0.4035), (ψεῦδος, 0.4035), (κύριος,...","διῶκται, ψεῦδος, κύριος, ἀγαπῶντες, μισθός, πρ..."
129,tlg0645,Justinus Martyr,3,"[Apologia, Apology II, Dialogus cum Tryphone]","[tlg0645.tlg001.opp-grc1.xml, tlg0645.tlg002.p...",73449,34006,5829,{'1.5': 1},1.5,christian,"[[αὐτοκράτωρ, Τίτος, αἰλίῳ, ἀδριανῷ, ἀντωνίνῳ,...","[[αὐτοκράτωρ, Τίτος, αἰλίῳ, ἀδριανῷ, ἀντωνίνῳ,...",45,0.000613,"[λέγω, εὐσεβής, φιλόσοφος, φύλαξ, ἐραστής, παι...","[(θεός, 0.03846), (κρίσις, 0.01795), (λαός, 0....","[(κρίσις, 0.08306), (θεός, 0.08202), (εἰρήνη, ...","κρίσις, θεός, εἰρήνη, δίδωμι, εὐσέβεια, πεπεδη..."


In [84]:
len(authors_df_cj)

28

In [51]:
authors_df_cj.columns

Index(['author_id', 'author', 'n_titles', 'titles', 'filenames', 'wordcount',
       'lemmata_wordcount', 'n_sentences', 'date_probs', 'date_avr',
       'provenience', 'lemmata_lists', 'lemmatized_sentences', 'dikaiosyne_N',
       'dikaiosyne_TF', 'dik_sents_flat', 'dik_sents_TF', 'dik_sents_TFIDF',
       'dik_sents_TFIDF_10'],
      dtype='object')

In [307]:
set_with_dataframe(ECCE_AGT_overview.add_worksheet("authors_df_cj_TFIDF10", 1,1), authors_df_cj[["author", "dik_sents_TFIDF_10"]])

In [52]:
edges_tuples = []
for author1 in authors_df_cj["author"].tolist():
    author1_words = authors_df_cj.loc[authors_df_cj["author"]==author1, "dik_sents_TFIDF_10"].values[0].split()
    for author2 in authors_df_cj["author"].tolist():
        if author2 != author1:
            author2_words = authors_df_cj.loc[authors_df_cj["author"]==author2, "dik_sents_TFIDF_10"].values[0].split()
            shared = len(set(author1_words) & set(author2_words))
            if shared > 0:
                edges_tuples.append((author1, author2, shared))

In [53]:
G = nx.Graph()
G.clear()
G.add_weighted_edges_from(edges_tuples)
for (u, v, wt) in G.edges.data('weight'):
    G[u][v]["weight"] = int(wt)
total_weight = sum([int(n) for n in nx.get_edge_attributes(G, "weight").values()])
for (u, v) in G.edges:
    G[u][v]["norm_weight"] = round((G[u][v]["weight"] / total_weight), 5)
    G[u][v]["distance"] = round(1 / (G[u][v]["weight"]), 5)
    G[u][v]["norm_distance"] = round(1 / (G[u][v]["norm_weight"] ), 5)
G.remove_edges_from(nx.selfloop_edges(G))

In [54]:
nx.convert_matrix.to_pandas_adjacency(G)

Unnamed: 0,Polycarp,Pauline literature,Clement of Alexandria,Ignatius Antiochenus,Maximus of Tyre,Flavius Josephus,Paul of Tarsus,Clemens Romanus,Hermas,GNT rest,...,Basil,Luke (the evangelist),Gregorius Nazianzenus,Pseudo-Justinus Martyr,Barnabas,Didache,Athanasius of Alexandria,Hippolytus,Philo Judaeus,Matthew
Polycarp,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pauline literature,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
Clement of Alexandria,1.0,1.0,0.0,0.0,1.0,4.0,1.0,2.0,2.0,0.0,...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,2.0,0.0
Ignatius Antiochenus,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Maximus of Tyre,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
Flavius Josephus,1.0,0.0,4.0,0.0,1.0,0.0,1.0,1.0,2.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,3.0,0.0
Paul of Tarsus,0.0,1.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Clemens Romanus,0.0,2.0,2.0,0.0,0.0,1.0,2.0,0.0,1.0,1.0,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Hermas,0.0,2.0,2.0,0.0,0.0,2.0,1.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0
GNT rest,0.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
reload(textnet)
fig = textnet.draw_2d_network(G)

In [296]:
#sddk.write_file("SDAM_data/ECCE_public/authors_network_10tfidf.png", fig, conf)

In [117]:
G_paul = textnet.construct_ego_network(G, "Paul of Tarsus", 10, reduced=True)

In [119]:
reload(textnet)

<module 'textnet' from '/home/kasev/ECCE_AGT/scripts/textnet.py'>

In [308]:
fig = textnet.draw_2d_network(G_paul)
fig.show()

In [756]:
fig.write_image("../figures/paul_egonet_10tfidf.png")

#  vectorization based on tfidf values

In [57]:
#tup_to_dict 
tfidfs_df_raw = pd.DataFrame([dict(tfidfs) for tfidfs in authors_df["dik_sents_TFIDF"].tolist()])
tfidfs_df_raw

Unnamed: 0,οὗτος,ἐντολή,πτωχός,διώκω,θεάομαι,ἀδελφός,ἐπιτρέπω,προεπεκαλέσασθέ,τὶς,πληρόω,...,ἀντιδίδωμι,ἐπικουρέειν,περιπέμπω,ἔκβασις,ʽτὸν,πάκορον,Σύριος,οὐεντίδιος,γιγνόμενα,τασϲόμενον
0,0.19805,0.18466,0.12457,0.12457,0.12457,0.12457,0.12457,0.12457,0.12457,0.12457,...,,,,,,,,,,
1,0.00701,,,0.03512,0.01021,,,,,0.01276,...,,,,,,,,,,
2,0.03087,,,0.02612,0.02143,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,0.04667,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,,,,,,,,,,,...,,,,,,,,,,
265,,,,,,,,,,,...,,,,,,,,,,
266,,,,,,,,,,,...,,,,,,,,,,
267,,,,,,,,,,,...,,,,,,,,,,


In [58]:
selected_columns = []
for column in tfidfs_df_raw.columns:
    if len(tfidfs_df_raw[tfidfs_df_raw[column].notnull()]) > 1:
        selected_columns.append(column)
len(selected_columns)

2765

In [59]:
tfidfs_df_filtered = tfidfs_df_raw[selected_columns]
tfidfs_df_filtered.fillna(0, inplace=True)

In [60]:
tfidfs_df_filtered

Unnamed: 0,οὗτος,ἐντολή,πτωχός,διώκω,θεάομαι,ἀδελφός,ἐπιτρέπω,τὶς,πληρόω,ἐκφέρω,...,μεταφέρω,σταθμός,ἐξευρίσκω,ὁπόσος,Μιλήσιος,δικαιόσυνος,σφοδρός,ἔχθρα,ἐπίτροπος,ἥμισυς
0,0.19805,0.18466,0.12457,0.12457,0.12457,0.12457,0.12457,0.12457,0.12457,0.12457,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.00701,0.00000,0.00000,0.03512,0.01021,0.00000,0.00000,0.00000,0.01276,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.03087,0.00000,0.00000,0.02612,0.02143,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.04667,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
265,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
266,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
267,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Tfidfs with cosine similarity

In [61]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD 
from sklearn.metrics.pairwise import cosine_similarity

In [62]:
tfidfs_sim_mat = cosine_similarity(tfidfs_df_filtered.to_numpy()).round(5)
tfidfs_dist_mat = 1 - cosine_similarity(tfidfs_df_filtered.to_numpy())
tfidfs_dist_mat = tfidfs_dist_mat.round(5)

In [63]:
authors = authors_df["author"].tolist()

In [64]:
tfidfs_sim_mat_df = pd.DataFrame(tfidfs_sim_mat, columns=authors, index=authors)
tfidfs_sim_mat_df

Unnamed: 0,Polycarp,Paul of Tarsus,Clemens Romanus,Pseudo-Justinus Martyr,Barnabas,Hermas,Testamentum Abrahamae,Pauline literature,Menander,GNT rest,...,Antisthenes,Gorgias of Leontini,Rufus Soph.,Tryphon I Grammaticus,Alcidamas,Babrius,Alciphron,Xenophon of Ephesus,Hermannus Diels,Philostratus Minor
Polycarp,1.00000,0.20562,0.21473,0.00675,0.07494,0.21202,0.08439,0.10462,0.05604,0.15242,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Paul of Tarsus,0.20562,1.00000,0.33376,0.19126,0.17925,0.24179,0.25314,0.19243,0.06339,0.31477,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Clemens Romanus,0.21473,0.33376,1.00000,0.07890,0.19072,0.28123,0.13309,0.26221,0.09193,0.25111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pseudo-Justinus Martyr,0.00675,0.19126,0.07890,1.00000,0.05795,0.12931,0.17878,0.10772,0.04258,0.15290,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Barnabas,0.07494,0.17925,0.19072,0.05795,1.00000,0.19311,0.08546,0.14578,0.07030,0.16445,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Babrius,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alciphron,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Xenophon of Ephesus,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Hermannus Diels,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
prechristian = ["Isocrates", "Aristotle", "Septuaginta", "Flavius Josephus", "Philo Judaeus", "Paul of Tarsus"]
tfidfs_prechristian = tfidfs_sim_mat_df[prechristian].loc[prechristian]
tfidfs_prechristian

Unnamed: 0,Isocrates,Aristotle,Septuaginta,Flavius Josephus,Philo Judaeus,Paul of Tarsus
Isocrates,1.0,0.50845,0.25758,0.49441,0.57362,0.15754
Aristotle,0.50845,1.0,0.26054,0.36762,0.63929,0.15322
Septuaginta,0.25758,0.26054,1.0,0.32254,0.30049,0.32178
Flavius Josephus,0.49441,0.36762,0.32254,1.0,0.43474,0.23478
Philo Judaeus,0.57362,0.63929,0.30049,0.43474,1.0,0.17135
Paul of Tarsus,0.15754,0.15322,0.32178,0.23478,0.17135,1.0


In [72]:
set_with_dataframe(ECCE_AGT_overview.add_worksheet("tfidfs_sim_prechristian", 1, 1), tfidfs_prechristian, include_index=True)

In [76]:
tfidfs_sim_mat_df[prechristian].sort_values("Paul of Tarsus", ascending=False).head(20)

Unnamed: 0,Isocrates,Aristotle,Septuaginta,Flavius Josephus,Philo Judaeus,Paul of Tarsus
Paul of Tarsus,0.15754,0.15322,0.32178,0.23478,0.17135,1.0
Catenae (Novum Testamentum),0.36156,0.35676,0.54121,0.43365,0.38973,0.57725
Epiphanius,0.30683,0.32864,0.34203,0.32726,0.33874,0.48311
Origenes,0.45813,0.46541,0.64137,0.43765,0.49226,0.44051
Eusebius,0.46744,0.42852,0.63168,0.52857,0.51721,0.36996
Chronicon Paschale,0.19643,0.15354,0.36495,0.24479,0.1864,0.36756
Clement of Alexandria,0.38516,0.43333,0.50025,0.39746,0.46209,0.33977
Clemens Romanus,0.19808,0.12724,0.35355,0.18709,0.22253,0.33376
Septuaginta,0.25758,0.26054,1.0,0.32254,0.30049,0.32178
GNT rest,0.12176,0.14479,0.32968,0.17334,0.15549,0.31477


In [None]:
'Polycarp', 'Paul of Tarsus', 'Clemens Romanus', 'Barnabas', 'Hermas', 'Pauline literature',
       'GNT rest', 'Didache', 'Justinus Martyr', 'Athenagoras', 'Origenes',
       'Theophilus Antiochenus', 'Matthew', 'Clement of Alexandria',
       'Eusebius', 'Johnannine literature', 'Epiphanius',
       'Athanasius of Alexandria', 'Luke (the evangelist)',
       'Gregorius Nazianzenus', 'Ignatius Antiochenus', 'Basil', 'Hippolytus',
       'Maximus of Tyre', 'Irenaeus', 'Philo Judaeus', 'Flavius Josephus']

In [94]:
authors_df_c = authors_df[(authors_df["provenience"] == "christian") & (authors_df["date_avr"] < 4) & (authors_df["dikaiosyne_N"] > 0)]

In [99]:
authors_df_c_a = authors_df_c[authors_df_c["date_avr"] < 2]
authors_df_c_a

Unnamed: 0,author_id,author,n_titles,titles,filenames,wordcount,lemmata_wordcount,n_sentences,date_probs,date_avr,provenience,lemmata_lists,lemmatized_sentences,dikaiosyne_N,dikaiosyne_TF,dik_sents_flat,dik_sents_TF,dik_sents_TFIDF,dik_sents_TFIDF_10
175,tlg1622,Polycarp,1,[Epistula ad Philippenses],[tlg1622.tlg001.1st1K-grc1.xml],1804,951,212,"{'0.5': 0.5, '1.5': 0.5}",1.0,christian,"[[πολύκαρπος, πρέσβυς, ἐκκλησία, θεός, παροικο...","[[πολύκαρπος, πρέσβυς, ἐκκλησία, θεός, παροικο...",8,0.004435,"[μακάριος, πτωχός, διώκω, εἰμί, βασιλεία, θεάο...","[(εἰμί, 0.04651), (οὗτος, 0.04651), (ἐντολή, 0...","[(οὗτος, 0.19805), (ἐντολή, 0.18466), (πτωχός,...","οὗτος, ἐντολή, πτωχός, διώκω, θεάομαι, ἀδελφός..."
35,tlg0031paul,Paul of Tarsus,7,"[Romans, 1 Corinthians, 2 Corinthians, Galatia...","[tlg0031.tlg006.perseus-grc2.xml, tlg0031.tlg0...",24066,11089,1632,{'0.5': 1},0.5,christian,"[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...","[[Παῦλος, δοῦλος, Ἰησοῦς, Χριστός, κλητός, ἀπό...",49,0.002036,"[θεός, ἀποκαλύπτω, πίστις, πίστις, γράφω, δίκα...","[(θεός, 0.04675), (πίστις, 0.04156), (νόμος, 0...","[(πίστις, 0.12745), (ἁμαρτία, 0.11192), (νόμος...","πίστις, ἁμαρτία, νόμος, λογίζομαι, θεός, βασιλ..."
156,tlg1271,Clemens Romanus,2,"[Epistula I ad Corinthios, Epistula II ad Cori...","[tlg1271.tlg001.1st1K-grc1.xml, tlg1271.tlg002...",13936,6290,1410,{'0.5': 1},0.5,christian,"[[ἐκκλησία, θεός, παροικοῦσα, Ῥώμη, ἐκκλησία, ...","[[ἐκκλησία, θεός, παροικοῦσα, Ῥώμη, ἐκκλησία, ...",21,0.001507,"[οὗτος, ἄπειμι, εἰρήνη, ἀπολιμπάνω, ἕκαστος, φ...","[(θεός, 0.02871), (πίστις, 0.02392), (γίγνομαι...","[(καυχάομαι, 0.10409), (πίστις, 0.10181), (πύλ...","καυχάομαι, πίστις, πύλη, θεός, δύσις, ἀγάπη, π..."
152,tlg1216,Barnabas,1,[The Epistle of Barnabas],[tlg1216.tlg001.perseus-grc2.xml],7440,3450,875,"{'0.5': 0.5, '1.5': 0.5}",1.0,christian,"[[χαίρω, υἱός, θυγάτηρ, ὄνομα, κύριος, ἀγαπάω,...","[[χαίρω, υἱός, θυγάτηρ, ὄνομα, κύριος, ἀγαπάω,...",9,0.00121,"[πείθω, οὗτος, σύνοιδα, λαλέω, πολύς, ἐπίσταμα...","[(ὁδός, 0.0411), (κύριος, 0.0274), (θεός, 0.02...","[(ὁδός, 0.17298), (σκότος, 0.13651), (ἀγαθός, ...","ὁδός, σκότος, ἀγαθός, σύνοιδα, ἐπίσταμαι, συνώ..."
164,tlg1419,Hermas,1,[The Shepherd of Hermas],[tlg1419.tlg001.1st1K-grc1.xml],30282,14340,2959,{'1.5': 1},1.5,christian,"[[τρέφω, πέρνημι, ῥόδῃ, τὶς, Ῥώμη, πολύς, ἔτος...","[[τρέφω, πέρνημι, ῥόδῃ, τὶς, Ῥώμη], [πολύς, ἔτ...",33,0.00109,"[λέγω, προηγουμένοις, ἐκκλησία, κατορθώσωνται,...","[(ἄγγελος, 0.03333), (ἐργάζομαι, 0.03), (εἰμί,...","[(ἄγγελος, 0.12433), (ἐργάζομαι, 0.12302), (ἀρ...","ἄγγελος, ἐργάζομαι, ἀρετή, ἔργον, ἀλήθεια, πον..."
36,tlg0031pspa,Pauline literature,6,"[Ephesians, Colossians, 2 Thessalonians, 1 Tim...","[tlg0031.tlg010.perseus-grc2.xml, tlg0031.tlg0...",8324,4103,390,{'0.5': 1},0.5,christian,"[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...","[[Παῦλος, ἀπόστολος, Χριστός, Ἰησοῦς, θέλημα, ...",8,0.000961,"[μανθάνω, χριστός, ἀκούω, διδάσκω, ἀλήθεια, Ἰη...","[(ἀλήθεια, 0.03252), (ἄνθρωπος, 0.02439), (θεό...","[(ἀλήθεια, 0.0935), (διώκω, 0.07915), (ἐνδύω, ...","ἀλήθεια, διώκω, ἐνδύω, φῶς, ἐπιθυμία, ἄνθρωπος..."
37,tlg0031rest,GNT rest,6,"[Hebrews, James, 1 Peter, 2 Peter, Jude, Revel...","[tlg0031.tlg019.perseus-grc2.xml, tlg0031.tlg0...",19782,9755,1037,{'0.5': 1},0.5,christian,"[[πολύτροπος, θεός, λαλέω, πατήρ, προφήτης, ἔσ...","[[πολύτροπος, θεός, λαλέω, πατήρ, προφήτης, ἔσ...",17,0.000859,"[ἀγαπάω, μισέω, ἀνομία, μετέχω, γάλα, ἄπειρος,...","[(θεός, 0.02703), (πίστις, 0.02317), (βασιλεύς...","[(βασιλεύς, 0.07352), (πίστις, 0.0668), (δίκαι...","βασιλεύς, πίστις, δίκαιος, Σαλήμ, βραδύς, φείδ..."
158,tlg1311,Didache,1,[Didache XII Apostolorum],[tlg1311.tlg001.1st1K-grc1.xml],2377,1049,284,{'1.5': 1},1.5,christian,"[[ὁδός, δύο, εἰμί, εἷς, ζωή, εἷς, θάνατος, δια...","[[ὁδός, δύο, εἰμί, εἷς, ζωή, εἷς, θάνατος, δια...",2,0.000841,"[διῶκται, ἀγαθός, μισέω, ἀλήθεια, ἀγαπῶντες, ψ...","[(κύριος, 0.14286), (διῶκται, 0.07143), (ἀγαθό...","[(διῶκται, 0.4035), (ψεῦδος, 0.4035), (κύριος,...","διῶκται, ψεῦδος, κύριος, ἀγαπῶντες, μισθός, πρ..."
129,tlg0645,Justinus Martyr,3,"[Apologia, Apology II, Dialogus cum Tryphone]","[tlg0645.tlg001.opp-grc1.xml, tlg0645.tlg002.p...",73449,34006,5829,{'1.5': 1},1.5,christian,"[[αὐτοκράτωρ, Τίτος, αἰλίῳ, ἀδριανῷ, ἀντωνίνῳ,...","[[αὐτοκράτωρ, Τίτος, αἰλίῳ, ἀδριανῷ, ἀντωνίνῳ,...",45,0.000613,"[λέγω, εὐσεβής, φιλόσοφος, φύλαξ, ἐραστής, παι...","[(θεός, 0.03846), (κρίσις, 0.01795), (λαός, 0....","[(κρίσις, 0.08306), (θεός, 0.08202), (εἰρήνη, ...","κρίσις, θεός, εἰρήνη, δίδωμι, εὐσέβεια, πεπεδη..."
150,tlg1205,Athenagoras,2,"[Supplication pro Christianis, De resurrectione]","[tlg1205.tlg001.perseus-grc2.xml, tlg1205.tlg0...",20210,9819,808,{'1.5': 1},1.5,christian,"[[ὑμέτερος, μέγας, βασιλεύς, οἰκουμένη, ἄλλος,...","[[ὑμέτερος, μέγας, βασιλεύς, οἰκουμένη, ἄλλος,...",12,0.000594,"[ὑμέτερος, ἄλλος, αἰτία, λαμβάνω, ἀδίκημα, πρό...","[(ἄνθρωπος, 0.02888), (ἔχω, 0.02166), (ψυχή, 0...","[(δίδωμι, 0.07665), (νόμος, 0.06069), (ψυχή, 0...","δίδωμι, νόμος, ψυχή, ἄνθρωπος, ἔχω, ἀποχή, κρί..."


In [98]:
len(authors_df_c[authors_df_c["date_avr"] >= 2])

9

In [90]:
tfidfs_christian.columns

Index(['Polycarp', 'Paul of Tarsus', 'Clemens Romanus',
       'Pseudo-Justinus Martyr', 'Barnabas', 'Hermas', 'Pauline literature',
       'GNT rest', 'Didache', 'Justinus Martyr', 'Athenagoras', 'Origenes',
       'Theophilus Antiochenus', 'Matthew', 'Clement of Alexandria',
       'Eusebius', 'Johnannine literature', 'Epiphanius',
       'Athanasius of Alexandria', 'Luke (the evangelist)',
       'Gregorius Nazianzenus', 'Ignatius Antiochenus', 'Basil', 'Hippolytus',
       'Maximus of Tyre', 'Irenaeus', 'Philo Judaeus', 'Flavius Josephus'],
      dtype='object')

In [88]:
christian = authors_df_cj["author"].tolist()
tfidfs_christian = tfidfs_sim_mat_df[christian].loc[christian].round(2)
tfidfs_christian

Unnamed: 0,Polycarp,Paul of Tarsus,Clemens Romanus,Pseudo-Justinus Martyr,Barnabas,Hermas,Pauline literature,GNT rest,Didache,Justinus Martyr,...,Athanasius of Alexandria,Luke (the evangelist),Gregorius Nazianzenus,Ignatius Antiochenus,Basil,Hippolytus,Maximus of Tyre,Irenaeus,Philo Judaeus,Flavius Josephus
Polycarp,1.0,0.21,0.21,0.01,0.07,0.21,0.1,0.15,0.03,0.12,...,0.11,0.09,0.04,0.12,0.04,0.02,0.1,0.04,0.15,0.19
Paul of Tarsus,0.21,1.0,0.33,0.19,0.18,0.24,0.19,0.31,0.04,0.25,...,0.11,0.17,0.13,0.02,0.19,0.08,0.07,0.05,0.17,0.23
Clemens Romanus,0.21,0.33,1.0,0.08,0.19,0.28,0.26,0.25,0.06,0.28,...,0.14,0.22,0.18,0.0,0.17,0.07,0.06,0.1,0.22,0.19
Pseudo-Justinus Martyr,0.01,0.19,0.08,1.0,0.06,0.13,0.11,0.15,0.0,0.13,...,0.09,0.04,0.07,0.0,0.12,0.04,0.08,0.05,0.15,0.15
Barnabas,0.07,0.18,0.19,0.06,1.0,0.19,0.15,0.16,0.36,0.49,...,0.17,0.16,0.08,0.0,0.08,0.02,0.06,0.07,0.16,0.19
Hermas,0.21,0.24,0.28,0.13,0.19,1.0,0.24,0.25,0.14,0.23,...,0.18,0.2,0.07,0.0,0.19,0.09,0.09,0.13,0.29,0.28
Pauline literature,0.1,0.19,0.26,0.11,0.15,0.24,1.0,0.21,0.11,0.22,...,0.22,0.14,0.14,0.0,0.16,0.04,0.02,0.03,0.16,0.15
GNT rest,0.15,0.31,0.25,0.15,0.16,0.25,0.21,1.0,0.02,0.28,...,0.13,0.19,0.21,0.05,0.13,0.04,0.03,0.03,0.16,0.17
Didache,0.03,0.04,0.06,0.0,0.36,0.14,0.11,0.02,1.0,0.1,...,0.22,0.08,0.0,0.0,0.06,0.0,0.0,0.0,0.07,0.06
Justinus Martyr,0.12,0.25,0.28,0.13,0.49,0.23,0.22,0.28,0.1,1.0,...,0.24,0.23,0.16,0.03,0.21,0.09,0.08,0.08,0.3,0.35


In [89]:
set_with_dataframe(ECCE_AGT_overview.add_worksheet("tfidfs_sim_christian", 1, 1), tfidfs_christian, include_index=True)

In [101]:
authors = authors_df_cj["author"].tolist()

In [None]:
tfidfs_christian

In [109]:
G = nx.from_numpy_matrix(tfidfs_christian.to_numpy())

authors_dict = dict(zip(range(len(authors)), authors))
G = nx.relabel_nodes(G, authors_dict)

total_weight = sum([int(n) for n in nx.get_edge_attributes(G, "weight").values()])
for (u, v) in G.edges:
    G[u][v]["norm_weight"] = round((G[u][v]["weight"] / total_weight), 5)
    G[u][v]["distance"] = round(1 / (G[u][v]["weight"]), 5)
    G[u][v]["norm_distance"] = round(1 / (G[u][v]["norm_weight"] ), 5)

edges_to_remove = []
for edge in G.edges:
    if edge[0] == edge[1]:
        edges_to_remove.append(edge)
G.remove_edges_from(edges_to_remove)

sim_threshold = 0.37
edges_to_remove = []
for edge in G.edges:
    if G[edge[0]][edge[1]]["weight"] < sim_threshold:
        edges_to_remove.append(edge)
G.remove_edges_from(edges_to_remove)

G.remove_nodes_from(list(nx.isolates(G)))

In [110]:
reload(textnet)
fig = textnet.draw_2d_network(G)
fig.show()

In [111]:
fig.write_image("../figures/authors_network_cos-sim-tfidf.png")

ValueError: Failed to start Kaleido subprocess. Error stream:

[1230/172140.251435:WARNING:resource_bundle.cc(435)] locale_file_path.empty() for locale 
[1230/172140.270396:WARNING:resource_bundle.cc(435)] locale_file_path.empty() for locale 
[1230/172140.271267:WARNING:resource_bundle.cc(435)] locale_file_path.empty() for locale 
[1230/172140.307137:ERROR:platform_shared_memory_region_posix.cc(249)] Creating shared memory in /tmp/.org.chromium.Chromium.X0cJim failed: No such file or directory (2)
[1230/172140.307244:FATAL:memory.cc(38)] Out of memory. size=131072
#0 0x5593904b4f89 base::debug::CollectStackTrace()
#1 0x5593904238e3 base::debug::StackTrace::StackTrace()
#2 0x559390434005 logging::LogMessage::~LogMessage()
#3 0x55939044e199 base::internal::OnNoMemoryInternal()
#4 0x55939043b363 base::FieldTrialList::InstantiateFieldTrialAllocatorIfNeeded()
#5 0x55939043b4e9 base::FieldTrialList::CopyFieldTrialStateToFlags()
#6 0x55938f14f43c content::GpuProcessHost::LaunchGpuProcess()
#7 0x55938f14de7d content::GpuProcessHost::Init()
#8 0x55938f14dc32 content::GpuProcessHost::Get()
#9 0x55938f4aae59 base::internal::Invoker<>::RunOnce()
#10 0x55939046402b base::TaskAnnotator::RunTask()
#11 0x559390474b3e base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWorkImpl()
#12 0x5593904748d0 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWork()
#13 0x5593904cfdc9 base::MessagePumpLibevent::Run()
#14 0x5593904750c5 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::Run()
#15 0x5593904510ee base::RunLoop::Run()
#16 0x55938efbd8a4 content::BrowserProcessSubThread::IOThreadRun()
#17 0x55939048a997 base::Thread::ThreadMain()
#18 0x5593904c48fe base::(anonymous namespace)::ThreadFunc()
#19 0x7f24d54366db start_thread
#20 0x7f24d3fc5a3f clone
Task trace:
#0 0x55938f4a9aac content::VizProcessTransportFactory::ConnectHostFrameSinkManager()

Received signal 6
#0 0x5593904b4f89 base::debug::CollectStackTrace()
#1 0x5593904238e3 base::debug::StackTrace::StackTrace()
#2 0x5593904b4b25 base::debug::(anonymous namespace)::StackDumpSignalHandler()
#3 0x7f24d54418a0 (/lib/x86_64-linux-gnu/libpthread-2.27.so+0x1289f)
#4 0x7f24d3ee2f47 gsignal
#5 0x7f24d3ee48b1 abort
#6 0x5593904b3a85 base::debug::BreakDebugger()
#7 0x5593904344a2 logging::LogMessage::~LogMessage()
#8 0x55939044e199 base::internal::OnNoMemoryInternal()
#9 0x55939043b363 base::FieldTrialList::InstantiateFieldTrialAllocatorIfNeeded()
#10 0x55939043b4e9 base::FieldTrialList::CopyFieldTrialStateToFlags()
#11 0x55938f14f43c content::GpuProcessHost::LaunchGpuProcess()
#12 0x55938f14de7d content::GpuProcessHost::Init()
#13 0x55938f14dc32 content::GpuProcessHost::Get()
#14 0x55938f4aae59 base::internal::Invoker<>::RunOnce()
#15 0x55939046402b base::TaskAnnotator::RunTask()
#16 0x559390474b3e base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWorkImpl()
#17 0x5593904748d0 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWork()
#18 0x5593904cfdc9 base::MessagePumpLibevent::Run()
#19 0x5593904750c5 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::Run()
#20 0x5593904510ee base::RunLoop::Run()
#21 0x55938efbd8a4 content::BrowserProcessSubThread::IOThreadRun()
#22 0x55939048a997 base::Thread::ThreadMain()
#23 0x5593904c48fe base::(anonymous namespace)::ThreadFunc()
#24 0x7f24d54366db start_thread
#25 0x7f24d3fc5a3f clone
  r8: 0000000000000000  r9: 00007f24d1fb70d0 r10: 0000000000000008 r11: 0000000000000246
 r12: 00007f24d1fb8398 r13: 00007f24d1fb7370 r14: 00007f24d1fb83a0 r15: aaaaaaaaaaaaaaaa
  di: 0000000000000002  si: 00007f24d1fb70d0  bp: 00007f24d1fb7320  bx: 00007f24d1fb7394
  dx: 0000000000000000  ax: 0000000000000000  cx: 00007f24d3ee2f47  sp: 00007f24d1fb70d0
  ip: 00007f24d3ee2f47 efl: 0000000000000246 cgf: 002b000000000033 erf: 0000000000000000
 trp: 0000000000000000 msk: 0000000000000000 cr2: 0000000000000000
[end of stack trace]
Calling _exit(1). Core file will not be generated.


In [747]:
G_paul = textnet.construct_ego_network(G, "Paul of Tarsus", 10, reduced=True)
fig = textnet.draw_2d_network(G_paul)
fig.show()

In [749]:
fig.write_image("../figures/paul_egonet_cos-sim-tfidf.png")