### Imports

In [None]:
import pandas as pd
import spacy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import igraph as ig
import nltk
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
from itertools import combinations, chain
from sklearn.feature_extraction.text import TfidfVectorizer
from stop_words import get_stop_words
from scipy.stats import pearsonr, ks_2samp, spearmanr
from statsmodels.stats.proportion import proportions_ztest

In [None]:
tqdm.pandas()

### Config

In [None]:
plt.style.use("seaborn-whitegrid")
plt.rc('ps',fonttype = 42)
plt.rc('pdf',fonttype = 42)
plt.rcParams.update({'font.size': 16})
plt.rcParams['ps.useafm'] = True
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['axes.unicode_minus'] = False

In [None]:
languages = ["German", "French", "Italian", "Spanish"]

In [None]:
dataframe_inptut_path = "data/processed/texts.p"
sentiment_dir = "data/sentiment/"

### Load data

#### Texts

In [None]:
texts_df = pd.read_pickle(dataframe_inptut_path)

#### Spacy NLP

In [None]:
de_nlp = sc.load("de_core_news_sm")
fr_nlp = sc.load("fr_core_news_sm")
es_nlp = sc.load("es_core_news_sm")
it_nlp = sc.load("it_core_news_sm")

nlp_to_use = {
    "German": de_nlp,
    "French": fr_nlp,
    "Spanish": es_nlp,
    "Italian": it_nlp
}

#### Sentiment dictionaries

In [None]:
sentiment_lexica = {}
for lang in languages:
    sentiment_lexica[lang] = {}
    with open("{}negative_words_{}.txt".format(sentiment_dir, lang.lower()), "r") as fr:
        sentiment_lexica[lang]["neg"] = fr.read().splitlines()
    with open("{}positive_words_{}.txt".format(sentiment_dir, lang.lower()), "r") as fr:
        sentiment_lexica[lang]["pos"] = fr.read().splitlines()

### Prepare data

#### Fix authors

In [None]:
author_fix = {
    "Bachiller D. P. Gatell": "Bachiller D. P. Gatell.",
    "Eliza Haywood": "Eliza Fowler Haywood",
}
texts_df["author"] = texts_df["author"].replace(author_fix)

#### Fix language

In [None]:
texts_df["language"] = texts_df["language"].replace("Spanish; Castilian", "Spanish")

#### Fix years

In [None]:
texts_df["date"] = texts_df["date"].apply(lambda x: x.split("-")[0])
texts_df["date"] = texts_df["date"].apply(lambda x: x.split(" [")[0])
texts_df["date"] = texts_df["date"].apply(lambda x: x.split(" bzw.")[0])
texts_df = texts_df[texts_df["date"] != "missing"]
texts_df = texts_df[texts_df["date"] != "4"]

#### Reduce to defined languages

In [None]:
texts_df = texts_df[texts_df["language"].isin(languages)]

#### Dataset statistics

In [None]:
for language in languages:
    print(language)
    lang_df = texts_df.loc[texts_df["language"] == language]
    journal_group = lang_df.groupby("filename")
    authors = lang_df["author"].unique()
    num_authors = len(authors)
    if "Anonym" in authors:
        num_authors -= 1
        num_anonymus = journal_group.apply(lambda x: 1 if all(x["author"] == "Anonym") else 0).sum()
    else:
        num_anonymus = 0
    if "missing" in authors:
        num_authors -= 1
        num_missing = journal_group.apply(lambda x: 1 if all(x["author"] == "missing") else 0).sum()
    else:
        num_missing = 0
    topics = lang_df["topics"].apply(lambda x:pd.Series(list(x))).reset_index().melt(id_vars="index").dropna()[["index", "value"]].set_index("index")
    years = lang_df["date"].unique()
        
    print("num authors:",  num_authors)
    print("num_anonymous:", num_anonymus)
    print("num_missing:", num_missing)
    print("num journals:", len(journal_group))
    print("num text passages:",  lang_df.shape[0])
    print("num topics:",  len(np.unique(topics)))
    print("years:", np.min(years), np.max(years))
    print()

#### Topics Statistics

In [None]:
topics = texts_df["topics"].apply(lambda x:pd.Series(list(x))).reset_index().melt(id_vars="index").dropna()[["index", "value"]].set_index("index")
topics_language_df = pd.merge(topics, texts_df["language"].to_frame(), left_index=True, right_index=True)

In [None]:
vc = topics_language_df.groupby("language")["value"].value_counts()

#### Lemmatizer

In [None]:
def lemmatize(row):
    lang = row["language"]
    if lang not in nlp_to_use.keys():
        return ""
    doc = nlp_to_use[lang](row["text"])
    tokens = []
    for t in doc:
        #if t.pos_ == "NOUN":
        tokens.append(t.lemma_)
    return " ".join(tokens)

texts_df["tokens"] = texts_df.progress_apply(lemmatize, axis=1)

#### Analyze sentiment method

In [None]:
def analyze_sentiment(text, nl, pl):
    tokens = nltk.word_tokenize(text)
    num_negative = 0
    num_positive = 0
    for nw in nl:
        num_negative += tokens.count(nw.lower())
    for pw in pl:
        num_positive += tokens.count(pw.lower())
    try:
        score = (num_positive - num_negative) / (num_positive + num_negative)
    except ZeroDivisionError:
        score = "ohne"
    return score

#### Calculate sentiment

In [None]:
texts_df["sentiment"] = 0
for language in languages:
    lang_df = texts_df.loc[texts_df["language"] == language]
    neg_lexicon = sentiment_lexica[language]["neg"]
    pos_lexicon = sentiment_lexica[language]["pos"]
    scores = lang_df["text"].progress_apply(analyze_sentiment, args=[neg_lexicon, pos_lexicon])
    texts_df["sentiment"].update(scores)

#### Create graphs per year

In [None]:
multiplicity_thresholds = [1]

In [None]:
def get_edges(text):
        temp_edges = []
        for i in vertices:
            if i in text:
                temp_edges.append(i)
        return list(combinations(sorted(temp_edges), 2))

In [None]:
graphs = {}
connectivity_results = {}
degree_results = {}
path_results = {}
cc_results = {}
centrality_results_df = pd.DataFrame()
assortativity_results = {}
for language in languages:#["Italian"]:
    print(language)
    
    connectivity_results[language] = {}
    degree_results[language] = {}
    path_results[language] = {}
    cc_results[language] = {}
    assortativity_results[language] = {}
    
    text_by_filename_language_df = texts_df.groupby(["filename", "language", "date"])["tokens"].apply(lambda x: " ".join(x)).to_frame().reset_index()
    language_df = text_by_filename_language_df[text_by_filename_language_df["language"] == language]
    stop_words = get_stop_words(language.lower())
    vectorizer = TfidfVectorizer(stop_words=stop_words, max_df=1.0, min_df=1, token_pattern="[^\d\W]{3,}")
    _ = vectorizer.fit_transform(language_df["tokens"])
    vertices = vectorizer.get_feature_names()
    neg_words = set()
    pos_words = set()
    for j in vertices:
        if j in sentiment_lexica[language]["neg"]:
            neg_words.add(j)
        if j in sentiment_lexica[language]["pos"]:
            pos_words.add(j)
    vertices = neg_words.union(pos_words)
    g = ig.Graph(directed=False)
    for i in neg_words:
        g.add_vertex(i, sent=1, color="red", sentiment="neg")
    for i in pos_words:
        g.add_vertex(i, sent=2, color="green", sentiment="pos")
    edges = list(chain(*language_df["tokens"].apply(get_edges)))
    unique_edges, edge_counts = np.unique(edges, return_counts=True, axis=0)
    g.add_edges(unique_edges)
    g.es["multiplicity"] = edge_counts
    for mt in multiplicity_thresholds:
        degree_results[language][mt] = {}
        path_results[language][mt] = {}
        cc_results[language][mt] = {}
        assortativity_results[language][mt] = {}
        g.delete_edges(np.where(np.array(g.es["multiplicity"]) < mt)[0])
        g.delete_vertices(np.where(np.array(g.degree()) == 0)[0])
        graphs[language] = g
        print("multiplicity_threshold:", mt, ", num_vertices:", g.vcount(), ", num_edges:", g.ecount())
        
        # connectivity
        print("num_components:", len(g.clusters()))
        k_cores = g.k_core()
        num_components = []
        for k, i in enumerate(k_cores, start=1):
            num_components.append(len(i.clusters()))
        connectivity_results[language][mt] = num_components
        
        # degree
        degree_results[language][mt]["all"] = g.degree()
        degree_results[language][mt]["neg"] = g.degree(np.where(np.array(g.vs["sentiment"]) == "neg")[0])
        degree_results[language][mt]["pos"] = g.degree(np.where(np.array(g.vs["sentiment"]) == "pos")[0])
        degree_results[language][mt]["mean_degree"] = np.mean(g.degree())
        degree_results[language][mt]["weighted_all"] = g.strength(weights="multiplicity")
        degree_results[language][mt]["weighted_neg"] = g.strength(np.where(np.array(g.vs["sentiment"]) == "neg")[0], weights="multiplicity")
        degree_results[language][mt]["weighted_pos"] = g.strength(np.where(np.array(g.vs["sentiment"]) == "pos")[0], weights="multiplicity")
        degree_results[language][mt]["mean_weighted_degree"] = np.mean(g.strength(weights="multiplicity"))
        
        # paths
        path_results[language][mt]["all"] = g.eccentricity()
        path_results[language][mt]["neg"] = g.eccentricity(np.where(np.array(g.vs["sentiment"]) == "neg")[0])
        path_results[language][mt]["pos"] = g.eccentricity(np.where(np.array(g.vs["sentiment"]) == "pos")[0])
        path_results[language][mt]["average_path_length"] = g.average_path_length(directed=False)
        
        # clustering coefficient
        cc_results[language][mt]["all"] = g.transitivity_local_undirected()
        cc_results[language][mt]["neg"] = g.transitivity_local_undirected(np.where(np.array(g.vs["sentiment"]) == "neg")[0])
        cc_results[language][mt]["pos"] = g.transitivity_local_undirected(np.where(np.array(g.vs["sentiment"]) == "pos")[0])
        cc_results[language][mt]["weighted_all"] = g.transitivity_local_undirected(weights="multiplicity")
        cc_results[language][mt]["weighted_neg"] = g.transitivity_local_undirected(np.where(np.array(g.vs["sentiment"]) == "neg")[0], weights="multiplicity")
        cc_results[language][mt]["weighted_pos"] = g.transitivity_local_undirected(np.where(np.array(g.vs["sentiment"]) == "pos")[0], weights="multiplicity")
        
        # centralities
        centrality_df = pd.DataFrame(np.array([g.vs["name"], g.degree(), g.betweenness(directed=False), g.closeness(), g.strength(weights="multiplicity"), g.betweenness(directed=False, weights="multiplicity"), g.closeness(weights="multiplicity")]).T, columns=["word", "degree", "betweenness", "closeness", "weighted_degree", "weighted_betweenness", "weighted_closeness"])
        centrality_df["language"] = language
        centrality_df["multiplicity_threshold"] = mt
        centrality_results_df = centrality_results_df.append(centrality_df)
        
        # assortativity
        assortativity_results[language][mt]["degree"] = g.assortativity_degree(directed=False)
        assortativity_results[language][mt]["sentiment"] = g.assortativity("sent", directed=False)
        
        print()

### Analyze Results/Plots

#### Connectivity

In [None]:
for language in languages:
    print(language)
    for mt in multiplicity_thresholds:
        print(max(connectivity_results[language][mt]))

#### Degree

In [None]:
for language in languages:
    print(language)
    for mt in multiplicity_thresholds:
        print(mt)
        print("all", min(path_results[language][mt]["all"]), max(path_results[language][mt]["all"]))
        print("neg", min(path_results[language][mt]["neg"]), max(path_results[language][mt]["neg"]))
        print("pos", min(path_results[language][mt]["pos"]), max(path_results[language][mt]["pos"]))
        print("avp", path_results[language][mt]["average_path_length"])

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(15,3), sharey=True)
for axidx, language in enumerate(languages):
    degree_all = degree_results[language][1]["all"]
    degree_neg = degree_results[language][1]["neg"]
    degree_pos = degree_results[language][1]["pos"]
        
    degree_all_sorted = np.sort(degree_all)
    degree_neg_sorted = np.sort(degree_neg)
    degree_pos_sorted = np.sort(degree_pos)
        
    degree_all_p = np.linspace(0, 1, len(degree_all))
    degree_neg_p = np.linspace(0, 1, len(degree_neg))
    degree_pos_p = np.linspace(0, 1, len(degree_pos))
        
    sns.lineplot(degree_all_sorted, 1 - degree_all_p, color="black", ci=None, ax=axs[axidx])
    sns.lineplot(degree_neg_sorted, 1 - degree_neg_p, color="red", ci=None, ax=axs[axidx])
    sns.lineplot(degree_pos_sorted, 1 - degree_pos_p, color="green", ci=None, ax=axs[axidx])
    axs[axidx].set_xlabel("Degree")
    axs[axidx].set_title(language)
axs[0].set_ylabel("CCDF")
axs[0].set_yticks([0, 0.5, 1])
plt.tight_layout()
plt.show()
plt.close()

In [None]:
for language in languages:
    degree_neg = degree_results[language][1]["neg"]
    degree_pos = degree_results[language][1]["pos"]
    weighted_degree_neg = degree_results[language][1]["weighted_neg"]
    weighted_degree_pos = degree_results[language][1]["weighted_pos"]
    
    print(language, "degree", ks_2samp(degree_neg, degree_pos))
    print(language, "weighteddegree", ks_2samp(weighted_degree_neg, weighted_degree_pos))

#### CCDF Eccentricity

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(10,4), sharey=True)
for axidx, mt in enumerate(languages):
    print(language, mt, path_results[language][mt]["average_path_length"])
    path_all = path_results[language][mt]["all"]
    path_neg = path_results[language][mt]["neg"]
    path_pos = path_results[language][mt]["pos"]

    path_all_sorted = np.sort(path_all)
    path_neg_sorted = np.sort(path_neg)
    path_pos_sorted = np.sort(path_pos)

    path_all_p = np.linspace(0, 0.1, len(path_all))
    path_neg_p = np.linspace(0, 1, len(path_neg))
    path_pos_p = np.linspace(0, 1, len(path_pos))

    sns.lineplot(path_all_sorted, 1 - path_all_p, color="black", ci=None, ax=axs[axidx])
    sns.lineplot(path_neg_sorted, 1 - path_neg_p, color="red", ci=None, ax=axs[axidx])
    sns.lineplot(path_pos_sorted, 1 - path_pos_p, color="green", ci=None, ax=axs[axidx])
    axs[axidx].set_xlabel("Eccentricity")
    axs[axidx].set_title(language)
axs[0].set_ylabel("CCDF")
axs[0].set_yticks([0, 0.5, 1])
plt.tight_layout()
plt.show()
plt.close()

#### CCDF Clustering Coefficient

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(15,3.3), sharey=True)
for axidx, language in enumerate(languages):
    cc_all = cc_results[language][1]["weighted_all"]
    cc_neg = cc_results[language][1]["weighted_neg"]
    cc_pos = cc_results[language][1]["weighted_pos"]

    cc_all_sorted = np.sort(cc_all)
    cc_neg_sorted = np.sort(cc_neg)
    cc_pos_sorted = np.sort(cc_pos)

    cc_all_p = np.linspace(0, 1, len(cc_all))
    cc_neg_p = np.linspace(0, 1, len(cc_neg))
    cc_pos_p = np.linspace(0, 1, len(cc_pos))
        
    sns.lineplot(cc_all_sorted, 1 - cc_all_p, color="black", ci=None, ax=axs[axidx])
    sns.lineplot(cc_neg_sorted, 1 - cc_neg_p, color="red", ci=None, ax=axs[axidx])
    sns.lineplot(cc_pos_sorted, 1 - cc_pos_p, color="green", ci=None, ax=axs[axidx])
    axs[axidx].set_xlabel("Weighted Local\nClustering Coefficient")
    axs[axidx].set_title(language)
axs[0].set_ylabel("CCDF")
axs[0].set_yticks([0, 0.5, 1])
plt.tight_layout()
plt.show()
plt.close()

In [None]:
for language in languages:
    lcc_neg = cc_results[language][1]["neg"]
    lcc_pos = cc_results[language][1]["pos"]
    weighted_lcc_neg = cc_results[language][1]["weighted_neg"]
    weighted_lcc_pos = cc_results[language][1]["weighted_pos"]
    
    print(language, "lcc", ks_2samp(lcc_neg, lcc_pos))
    print(language, "weightedlcc", ks_2samp(weighted_lcc_neg, weighted_lcc_pos))

#### Centralities

In [None]:
for language in languages:
    print(language)
    pos_words = set()
    neg_words = set()
    language_df = centrality_results_df[(centrality_results_df["language"] == language) & (centrality_results_df["multiplicity_threshold"] == 1)]
    deg_top_words = language_df.sort_values("degree", ascending=False)["word"].head(10).tolist()
    bet_top_words = language_df.sort_values("betweenness", ascending=False)["word"].head(10).tolist()
    clo_top_words = language_df.sort_values("closeness", ascending=False)["word"].head(10).tolist()
    
    weighted_deg_top_words = language_df.sort_values("weighted_degree", ascending=False)["word"].head(10).tolist()
    weighted_bet_top_words = language_df.sort_values("weighted_betweenness", ascending=False)["word"].head(10).tolist()
    weighted_clo_top_words = language_df.sort_values("weighted_closeness", ascending=False)["word"].head(10).tolist()
    #translator= Translator(to_lang="en", from_lang=language)
    print("normal")
    for i in range(0, 10):
        #bet_translation = translator.translate(bet_top_words[i])
        #clo_translation = translator.translate(clo_top_words[i])
        if deg_top_words[i] in sentiment_lexica[language]["pos"]:
            print("\cellcolor{green!25}", deg_top_words[i], "&", end=" ")        
        elif deg_top_words[i] in sentiment_lexica[language]["neg"]:
            print("\cellcolor{red!25}", deg_top_words[i], "&", end=" ")
        
        if bet_top_words[i] in sentiment_lexica[language]["pos"]:
            print("\cellcolor{green!25}", bet_top_words[i], "&", end=" ")        
        elif bet_top_words[i] in sentiment_lexica[language]["neg"]:
            print("\cellcolor{red!25}", bet_top_words[i], "&", end=" ")
            
        if clo_top_words[i] in sentiment_lexica[language]["pos"]:
            print("\cellcolor{green!25}", clo_top_words[i], "\\\\")        
        elif clo_top_words[i] in sentiment_lexica[language]["neg"]:
            print("\cellcolor{red!25}", clo_top_words[i], "\\\\")
    
    print()
    print("weighted")
    for i in range(0, 10):
        #bet_translation = translator.translate(bet_top_words[i])
        #clo_translation = translator.translate(clo_top_words[i])
        if weighted_deg_top_words[i] in sentiment_lexica[language]["pos"]:
            print("\cellcolor{green!25}", weighted_deg_top_words[i], "&", end=" ")        
        elif weighted_deg_top_words[i] in sentiment_lexica[language]["neg"]:
            print("\cellcolor{red!25}", weighted_deg_top_words[i], "&", end=" ")
        
        if weighted_bet_top_words[i] in sentiment_lexica[language]["pos"]:
            print("\cellcolor{green!25}", weighted_bet_top_words[i], "&", end=" ")        
        elif weighted_bet_top_words[i] in sentiment_lexica[language]["neg"]:
            print("\cellcolor{red!25}", weighted_bet_top_words[i], "&", end=" ")
            
        if weighted_clo_top_words[i] in sentiment_lexica[language]["pos"]:
            print("\cellcolor{green!25}", weighted_clo_top_words[i], "\\\\")        
        elif weighted_clo_top_words[i] in sentiment_lexica[language]["neg"]:
            print("\cellcolor{red!25}", weighted_clo_top_words[i], "\\\\")
    
    print()

In [None]:
centrality_names_mapping = {
    "degree": "Degree",
    "weighted_degree": "Weighted\nDegree",
    "betweenness": "Betweenness",
    "weighted_betweenness": "Weighted\nBetweenness",
    "closeness": "Closeness",
    "weighted_closeness": "Weighted\nCloseness"
    
}

In [None]:
fig, axs = plt.subplots(1, 6, figsize=(15,4), sharey=True)
for axidx, centrality in enumerate(["degree", "betweenness", "closeness",  "weighted_degree", "weighted_betweenness", "weighted_closeness"]):
    combined_results = []
    for language in languages:
        language_df = centrality_results_df[(centrality_results_df["language"] == language) & (centrality_results_df["multiplicity_threshold"] == 1)]
        temp_centrality_df =  language_df.sort_values(centrality, ascending=False).head(100)["word"].to_frame()
        
        def get_word_sentiment(word):
            if word in sentiment_lexica[language]["pos"]:
                return "positive"
            elif word in sentiment_lexica[language]["neg"]:
                return "negative"
        
        temp_centrality_df["sentiment"] = temp_centrality_df["word"].apply(get_word_sentiment)
        temp_centrality_df["language"] = language
        combined_results.append(temp_centrality_df)
    combined_results_df = pd.concat(combined_results)
    props = combined_results_df.groupby("language")["sentiment"].value_counts(normalize=True).unstack()
    props["total"] = props["negative"] + props["positive"]
    sns.barplot(data=props.reset_index(), x="language", y="total", color="lightcoral", ax=axs[axidx], order=["German", "French", "Italian", "Spanish"])
    sns.barplot(data=props.reset_index(), x="language", y="positive", color="lightgreen", ax=axs[axidx], order=["German", "French", "Italian", "Spanish"])
    axs[axidx].set_xticklabels(axs[axidx].get_xticklabels(), rotation=90)
    axs[axidx].set_title(centrality_names_mapping[centrality])
    axs[axidx].set_xlabel(None)
    axs[axidx].set_ylabel(None)
axs[0].set_ylabel("Percentage")
axs[0].set_ylim(0, 1)
axs[0].set_yticks([0, 0.25, 0.5, 0.75, 1])
axs[0].set_yticklabels([0, 25, 50, 75, 100])
plt.subplots_adjust(bottom=0.27, top=0.82)
plt.show()

In [None]:
num_smaller_sig = 0
num_larger_sig = 0
num_smaller_non_sig = 0 
num_larger_non_sig = 0
for centrality in ["degree", "betweenness", "closeness",  "weighted_degree", "weighted_betweenness", "weighted_closeness"]:
    for language in languages:
        language_df = centrality_results_df[(centrality_results_df["language"] == language) & (centrality_results_df["multiplicity_threshold"] == 1)]
        temp_centrality_df =  language_df.sort_values(centrality, ascending=False).head(100)["word"].to_frame()
        
        def get_word_sentiment(word):
            if word in sentiment_lexica[language]["pos"]:
                return "positive"
            elif word in sentiment_lexica[language]["neg"]:
                return "negative"
        
        temp_centrality_df["sentiment"] = temp_centrality_df["word"].apply(get_word_sentiment)
        neg_count = temp_centrality_df["sentiment"].value_counts()["negative"]
        neg_net_ratio = neg_count / 100
        neg_graph_ratio = len(np.where(np.array(graphs[language].vs["sentiment"]) == "neg")[0]) / graphs[language].vcount()
        if neg_net_ratio < neg_graph_ratio:
            pvalue = proportions_ztest(count=neg_count, nobs=100, value=neg_graph_ratio, alternative="smaller")[1]
            alternative = "smaller"
        else:
            pvalue = proportions_ztest(count=neg_count, nobs=100, value=neg_graph_ratio, alternative="larger")[1]
            alternative = "larger"
            
        if pvalue < 0.05:
            sig = "significant"
        else:
            sig = "non-significant"
            
        if alternative == "smaller" and sig == "significant": num_smaller_sig += 1
        if alternative == "larger" and sig == "significant": num_larger_sig += 1
        if alternative == "smaller" and sig == "non-significant": num_smaller_non_sig += 1
        if alternative == "larger" and sig == "non-significant": num_larger_non_sig += 1
            
        print(centrality, language, neg_net_ratio, round(neg_graph_ratio, 2), alternative, pvalue)
        
print(num_smaller_sig)
print(num_larger_sig)
print(num_smaller_non_sig)
print(num_larger_non_sig)

In [None]:
for language in languages:
    #print(language)
    language_df = centrality_results_df[(centrality_results_df["language"] == language) & (centrality_results_df["multiplicity_threshold"] == 1)].set_index("word")
    deg = language_df["degree"]
    bet = language_df["betweenness"]
    clo = language_df["closeness"]
    weighted_deg = language_df["weighted_degree"]
    weighted_bet = language_df["weighted_betweenness"]
    weighted_clo = language_df["weighted_closeness"]
    if spearmanr(deg, bet)[1] > 0.05:
        print(language, "degbet")
    if spearmanr(deg, clo)[1] > 0.05:
        print(language, "degclo")
    if spearmanr(bet, clo)[1] > 0.05:
        print(language, "betclo")
    if spearmanr(weighted_deg, weighted_bet)[1] > 0.05:
        print(language, "weighted_degbet")
    if spearmanr(weighted_deg, weighted_clo)[1] > 0.05:
        print(language, "weighted_degclo")
    if spearmanr(weighted_bet, weighted_clo)[1] > 0.05:
        print(language, "weighted_betclo")

#### Ambivalence

In [None]:
for language in languages:
    print(language)
    language_df = texts_df[texts_df["language"] == language]
    print(language_df["sentiment"].mean().round(3))
    print("++++")
    for mt in multiplicity_thresholds:
        print(round(assortativity_results[language][mt]["degree"], 3))
        print(round(assortativity_results[language][mt]["sentiment"], 3))
    print()

#### Sentiment

In [None]:
for language in languages:
    language_df = texts_df[texts_df["language"] == language]
    fig, ax = plt.subplots(figsize=(10,2.5))
    sns.lineplot(data=language_df, x="date", y="sentiment")
    plt.draw()
    ax.set_xlabel("Years")
    ax.set_ylim(-0.55, 0.55)
    ax.set_ylabel("Mean\nSentiment")
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    plt.subplots_adjust(bottom=0.42, top=0.96, right=0.97)
    plt.show()
    plt.close()

In [None]:
topics = texts_df["topics"].apply(lambda x:pd.Series(list(x))).reset_index().melt(id_vars="index").dropna()[["index", "value"]].set_index("index")
t_s_df = pd.merge(topics, texts_df["sentiment"].to_frame(), left_index=True, right_index=True)

In [None]:
fig, ax = plt.subplots(figsize=(15,6))
sns.barplot(data=t_s_df, x="value", y="sentiment", ax=ax)
ax.set_xlabel("Topics")
ax.set_ylabel("Mean\nSentiment")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.tight_layout()
plt.show()