In [9]:
import pickle
import pandas as pd
import os
import re

## Sentiment analysis

In [None]:
tokenized_sentence = pickle.load(open("../results/tokenized_by_sentence.pkl", "rb"))
classification = pipeline(task="sentiment-analysis", model="NYTK/sentiment-ohb3-hubert-hungarian", device=0)
sentiment = [classification(line) for line in tokenized_sentence]
pickle.dump(sentiment, open("../results/sentiment.pkl", "wb"))

In [None]:
sentiment = pickle.load(open("../results/sentiment.pkl", "rb"))
filtered_sentiment = [
    [{'label': item['label']} for item in sublist if item['label'] in ['LABEL_0', 'LABEL_1', 'LABEL_2']] for sublist in
    sentiment]
filtered_sentiment_2 = [[item['label'] for item in sublist if item['label'] in ['LABEL_0', 'LABEL_1', 'LABEL_2']] for
                        sublist in filtered_sentiment]

pickle.dump(filtered_sentiment_2, open("../results/filtered_sentiment.pkl", "wb"))

## Emotion analysis

In [None]:
from transformers import pipeline
classifier = pipeline("text-classification", model="visegradmedia-emotion/Emotion_RoBERTa_hungarian6")

with open("../resources/docs.pkl", "rb") as f:
    docs = pickle.load(f)
emotion = [classifier(doc) for doc in docs]
emotion_label = [[item['label'] for item in sublist] for sublist in emotion]

In [None]:
for i in range(len(emotion_label)):
    for j in range(len(emotion_label[i])):
        if emotion_label[i][j] == "LABEL_0":
            emotion_label[i][j] = "anger"
        elif emotion_label[i][j] == "LABEL_1":
            emotion_label[i][j] = "fear"
        elif emotion_label[i][j] == "LABEL_2":
            emotion_label[i][j] = "disgust"
        elif emotion_label[i][j] == "LABEL_3":
            emotion_label[i][j] = "sadness"
        elif emotion_label[i][j] == "LABEL_4":
            emotion_label[i][j] = "joy"
        elif emotion_label[i][j] == "LABEL_5":
            emotion_label[i][j] = "none_of_them"


pickle.dump(emotion_label, open("../results/emotion_label.pkl", "wb"))

## Linguistic analysis

In [12]:
meta = pickle.load(open("../resources/meta.pkl", "rb"))
ddir = "../data/combined_transcripts_freeConv"

lemmatized_clean = pickle.load(open("../results/lemmatized_clean.pkl",  "rb")) # nincs benne írásjel, hezitálás, hümmögés, nevetés
lemmatized_raw = pickle.load(open("../results/lemmatized_raw.pkl", "rb")) # van benne írásjel, hezitálás, hümmögés, nevetés
pos = pickle.load(open("../results/postags_clean.pkl", "rb"))
emotion = pickle.load(open("../results/emotion_label.pkl", "rb"))
sentiment = pickle.load(open("../results/filtered_sentiment.pkl", "rb"))

In [13]:
negative = 'LABEL_0'
neutral = 'LABEL_1'
positive = 'LABEL_2'

In [14]:
pattern_1 = re.compile(r'1Sg')
pattern_2 = re.compile(r'2Sg')
pattern_3 = re.compile(r'3Sg')
pattern_4 = re.compile(r'1Pl')
pattern_5 = re.compile(r'2Pl')
pattern_6 = re.compile(r'3Pl')

pattern_7 = re.compile(r'Prs')
pattern_8 = re.compile(r'Pst')
pattern_9 = re.compile(r'Fut')

pattern_10 = re.compile(r'Cond')

pattern_11 = re.compile(r'/V')
pattern_12 = re.compile(r'/N')
pattern_13 = re.compile(r'/Adj')
pattern_14 = re.compile(r'/Num')

pattern_15 = re.compile(r'/Adv')
pattern_16 = re.compile(r'Art') 
pattern_17 = re.compile(r'/Prev') 
pattern_18 = re.compile(r'/Post') 
pattern_19 = re.compile(r'Pro') 
pattern_20 = re.compile(r'/Cnj') 
pattern_21 = re.compile(r'/Inj-Utt')
pattern_22 = re.compile(r'/Det')
negation = ["nem", "sem", "semmi", "senki", "soha", "nincs", "sehol", "semmilyen", "sehova"]
funct_words = [pattern_10, pattern_15, pattern_16, pattern_17, pattern_18, pattern_19, pattern_20, pattern_21, pattern_22]
# LSM calculation: https://www.liwc.app/help/lsm

In [None]:
input_files = [f for f in os.listdir(ddir) if os.path.isfile(os.path.join(ddir, f))]
meta2idx = {v: k for k,v in meta.items()}

results_list_mordor = []
results_list_gondor = []

for input_file in input_files:
    with open(os.path.join(ddir, input_file)) as infile:
        txt = infile.read()
    pair = re.search(r'pair(\d+)_', input_file).group(1)
    dialogue_docs = [d for d in meta.values() if d.startswith(input_file)] # a dialógushoz tartozó dokumentumok listája
    dialogue_idx = [meta2idx[d] for d in dialogue_docs] # a dialógushoz tartozó dokumentumok indexeinek listája
    mordor = [d for d in dialogue_docs if "mordor" in d] # a Mordor nevű karakterhez tartozó dokumentumok listája
    mordor_idx = [meta2idx[d] for d in mordor] # a Mordor nevű karakterhez tartozó dokumentumok indexeinek listája
    mordor_words_corpus = [lemmatized_clean[i] for i in mordor_idx]
    mordor_words_flatten_corpus = [item for sublist in mordor_words_corpus for item in sublist]
    mordor_word_count_corpus = len(mordor_words_flatten_corpus) 
    mordor_unique_word_count_corpus = len(set(mordor_words_flatten_corpus))
    mordor_lexical_diversity_corpus = mordor_unique_word_count_corpus / mordor_word_count_corpus
    mordor_word_avg_corpus = mordor_word_count_corpus / len(mordor_words_corpus)

    mordor_words_raw = [lemmatized_raw[i] for i in mordor_idx]
    mordor_words_flatten_raw = [item for sublist in mordor_words_raw for item in sublist]
    mordor_word_count_raw = len(mordor_words_flatten_raw)
    mordor_hes = sum([lemmatized_raw[i].count("hes") for i in mordor_idx])
    mordor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in mordor_idx])
    mordor_hum = sum([lemmatized_raw[i].count("hum") for i in mordor_idx])
    mordor_kerdojel = sum([lemmatized_raw[i].count("?") for i in mordor_idx])
    mordor_hes_ratio = mordor_hes / mordor_word_count_raw
    mordor_laugh_ratio = mordor_laugh / mordor_word_count_raw
    mordor_hum_ratio = mordor_hum / mordor_word_count_raw
    mordor_kerdojel_ratio = mordor_kerdojel / mordor_word_count_raw
    
    mordor_anger = sum([emotion[i].count("anger") for i in mordor_idx])
    mordor_disgust = sum([emotion[i].count("disgust") for i in mordor_idx])
    mordor_fear = sum([emotion[i].count("fear") for i in mordor_idx])
    mordor_happiness = sum([emotion[i].count("joy") for i in mordor_idx])
    mordor_sadness = sum([emotion[i].count("sadness") for i in mordor_idx])
    mordor_no_emotion = sum([emotion[i].count("none_of_them") for i in mordor_idx])
    
    mordor_anger_ratio = mordor_anger / len(mordor_idx)
    mordor_disgust_ratio = mordor_disgust / len(mordor_idx)
    mordor_fear_ratio = mordor_fear / len(mordor_idx)
    mordor_happiness_ratio = mordor_happiness / len(mordor_idx)
    mordor_sadness_ratio = mordor_sadness / len(mordor_idx)
    mordor_no_emotion_ratio = mordor_no_emotion / len(mordor_idx)
    
   
    mordor_positive = sum(positive.count(item) for i in mordor_idx for item in sentiment[i])
    mordor_negative = sum(negative.count(item) for i in mordor_idx for item in sentiment[i])
    mordor_neutral = sum(neutral.count(item) for i in mordor_idx for item in sentiment[i])
    mordor_sentiment_sum = mordor_positive + mordor_negative + mordor_neutral
    mordor_positive_ratio = mordor_positive / mordor_sentiment_sum
    mordor_negative_ratio = mordor_negative / mordor_sentiment_sum
    mordor_neutral_ratio = mordor_neutral / mordor_sentiment_sum

    mordor_s_first_person = sum(len(pattern_1.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_s_second_person = sum(len(pattern_2.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_s_third_person = sum(len(pattern_3.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_p_first_person = sum(len(pattern_4.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_p_second_person = sum(len(pattern_5.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_p_third_person = sum(len(pattern_6.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_present = sum(len(pattern_7.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_past = sum(len(pattern_8.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_future = sum(len(pattern_9.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_cond = sum(len(pattern_10.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_verb = sum(len(pattern_11.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_noun = sum(len(pattern_12.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_adj = sum(len(pattern_13.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_num = sum(len(pattern_14.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_adv = sum(len(pattern_15.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_art = sum(len(pattern_16.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_prev = sum(len(pattern_17.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_post = sum(len(pattern_18.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_pro = sum(len(pattern_19.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_conj = sum(len(pattern_20.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_inj = sum(len(pattern_21.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_det = sum(len(pattern_22.findall(item)) for i in mordor_idx for item in pos[i])
    mordor_negation = sum([negation.count(word) for i in mordor_idx for word in lemmatized_clean[i]])
    mordor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in mordor_idx for item in pos[i])

    mordor_s_first_person_ratio = (mordor_s_first_person / mordor_word_count_corpus) 
    mordor_s_second_person_ratio = (mordor_s_second_person / mordor_word_count_corpus) 
    mordor_s_third_person_ratio = (mordor_s_third_person / mordor_word_count_corpus) 
    mordor_p_first_person_ratio = (mordor_p_first_person / mordor_word_count_corpus) 
    mordor_p_second_person_ratio = (mordor_p_second_person / mordor_word_count_corpus) 
    mordor_p_third_person_ratio = (mordor_p_third_person / mordor_word_count_corpus) 
    mordor_present_ratio = (mordor_present / mordor_word_count_corpus) 
    mordor_past_ratio = (mordor_past / mordor_word_count_corpus) 
    mordor_future_ratio = (mordor_future / mordor_word_count_corpus) 
    mordor_cond_ratio = (mordor_cond / mordor_word_count_corpus) 
    mordor_verb_ratio = (mordor_verb / mordor_word_count_corpus)
    mordor_noun_ratio = (mordor_noun / mordor_word_count_corpus) 
    mordor_adj_ratio = (mordor_adj / mordor_word_count_corpus) 
    mordor_adv_ratio = (mordor_adv / mordor_word_count_corpus) 
    mordor_num_ratio = (mordor_num / mordor_word_count_corpus) 
    mordor_art_ratio = (mordor_art / mordor_word_count_corpus) 
    mordor_prev_ratio = (mordor_prev / mordor_word_count_corpus) 
    mordor_post_ratio = (mordor_post / mordor_word_count_corpus) 
    mordor_pro_ratio = (mordor_pro / mordor_word_count_corpus) 
    mordor_conj_ratio = (mordor_conj / mordor_word_count_corpus) 
    mordor_inj_ratio = (mordor_inj / mordor_word_count_corpus) 
    mordor_det_ratio = (mordor_det / mordor_word_count_corpus)
    mordor_negation_ratio = (mordor_negation / mordor_word_count_corpus) 
    mordor_funct_words_ratio = (mordor_funct_words / mordor_word_count_corpus) 
    

    gondor = [d for d in dialogue_docs if "gondor" in d] # a Gondor nevű karakterhez tartozó dokumentumok listája
    gondor_idx = [meta2idx[d] for d in gondor] # a Gondor nevű karakterhez tartozó dokumentumok indexeinek listája
    gondor_third = len(gondor_idx) // 3

    gondor_words_corpus = [lemmatized_clean[i] for i in gondor_idx]
    gondor_words_flatten_corpus = [item for sublist in gondor_words_corpus for item in sublist]
    gondor_word_count_corpus = len(gondor_words_flatten_corpus) 
    gondor_unique_word_count_corpus = len(set(gondor_words_flatten_corpus))
    gondor_lexical_diversity_corpus = gondor_unique_word_count_corpus / gondor_word_count_corpus
    gondor_word_avg_corpus = gondor_word_count_corpus / len(gondor_words_corpus)

    gondor_words_raw = [lemmatized_raw[i] for i in gondor_idx]
    gondor_words_flatten_raw = [item for sublist in gondor_words_raw for item in sublist]
    gondor_word_count_raw = len(gondor_words_flatten_raw)
    gondor_hes = sum([lemmatized_raw[i].count("hes") for i in gondor_idx])
    gondor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in gondor_idx])
    gondor_hum = sum([lemmatized_raw[i].count("hum") for i in gondor_idx])
    gondor_kerdojel = sum([lemmatized_raw[i].count("?") for i in gondor_idx])
    gondor_hes_ratio = gondor_hes / gondor_word_count_raw
    gondor_laugh_ratio = gondor_laugh / gondor_word_count_raw
    gondor_hum_ratio = gondor_hum / gondor_word_count_raw
    gondor_kerdojel_ratio = gondor_kerdojel / gondor_word_count_raw
    
    gondor_anger = sum([emotion[i].count("anger") for i in gondor_idx])
    gondor_disgust = sum([emotion[i].count("disgust") for i in gondor_idx])
    gondor_fear = sum([emotion[i].count("fear") for i in gondor_idx])
    gondor_happiness = sum([emotion[i].count("joy") for i in gondor_idx])
    gondor_sadness = sum([emotion[i].count("sadness") for i in gondor_idx])
    gondor_no_emotion = sum([emotion[i].count("none_of_them") for i in gondor_idx])
    gondor_anger_ratio = (gondor_anger / len(gondor_idx))
    gondor_disgust_ratio = (gondor_disgust / len(gondor_idx))
    gondor_fear_ratio = (gondor_fear / len(gondor_idx)) 
    gondor_happiness_ratio = gondor_happiness / len(gondor_idx)
    gondor_sadness_ratio = gondor_sadness / len(gondor_idx)
    gondor_no_emotion_ratio = gondor_no_emotion / len(gondor_idx)
    
    gondor_positive = sum(positive.count(item) for i in gondor_idx for item in sentiment[i])
    gondor_negative = sum(negative.count(item) for i in gondor_idx for item in sentiment[i])
    gondor_neutral = sum(neutral.count(item) for i in gondor_idx for item in sentiment[i])
    gondor_sentiment_sum = gondor_positive + gondor_negative + gondor_neutral
    gondor_positive_ratio = (gondor_positive / gondor_sentiment_sum)
    gondor_negative_ratio = (gondor_negative / gondor_sentiment_sum)
    gondor_neutral_ratio = (gondor_neutral / gondor_sentiment_sum) 

    gondor_s_first_person = sum(len(pattern_1.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_s_second_person = sum(len(pattern_2.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_s_third_person = sum(len(pattern_3.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_p_first_person = sum(len(pattern_4.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_p_second_person = sum(len(pattern_5.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_p_third_person = sum(len(pattern_6.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_present = sum(len(pattern_7.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_past = sum(len(pattern_8.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_future = sum(len(pattern_9.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_cond = sum(len(pattern_10.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_verb = sum(len(pattern_11.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_noun = sum(len(pattern_12.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_adj = sum(len(pattern_13.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_num = sum(len(pattern_14.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_adv = sum(len(pattern_15.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_art = sum(len(pattern_16.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_prev = sum(len(pattern_17.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_post = sum(len(pattern_18.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_pro = sum(len(pattern_19.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_conj = sum(len(pattern_20.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_inj = sum(len(pattern_21.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_det = sum(len(pattern_22.findall(item)) for i in gondor_idx for item in pos[i])
    gondor_negation = sum([negation.count(word) for i in gondor_idx for word in lemmatized_clean[i]])
    gondor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in gondor_idx for item in pos[i])
    

    gondor_s_first_person_ratio = (gondor_s_first_person / gondor_word_count_corpus) 
    gondor_s_second_person_ratio = (gondor_s_second_person / gondor_word_count_corpus) 
    gondor_s_third_person_ratio = (gondor_s_third_person / gondor_word_count_corpus) 
    gondor_p_first_person_ratio = (gondor_p_first_person / gondor_word_count_corpus) 
    gondor_p_second_person_ratio = (gondor_p_second_person / gondor_word_count_corpus) 
    gondor_p_third_person_ratio = (gondor_p_third_person / gondor_word_count_corpus) 
    gondor_present_ratio = (gondor_present / gondor_word_count_corpus) 
    gondor_past_ratio = (gondor_past / gondor_word_count_corpus) 
    gondor_future_ratio = (gondor_future / gondor_word_count_corpus) 
    gondor_cond_ratio = (gondor_cond / gondor_word_count_corpus) 
    gondor_verb_ratio = (gondor_verb / gondor_word_count_corpus) 
    gondor_noun_ratio = (gondor_noun / gondor_word_count_corpus) 
    gondor_adj_ratio = (gondor_adj / gondor_word_count_corpus) 
    gondor_adv_ratio = (gondor_adv / gondor_word_count_corpus) 
    gondor_num_ratio = (gondor_num / gondor_word_count_corpus) 
    gondor_art_ratio = (gondor_art / gondor_word_count_corpus) 
    gondor_prev_ratio = (gondor_prev / gondor_word_count_corpus) 
    gondor_post_ratio = (gondor_post / gondor_word_count_corpus) 
    gondor_pro_ratio = (gondor_pro / gondor_word_count_corpus) 
    gondor_conj_ratio = (gondor_conj / gondor_word_count_corpus) 
    gondor_inj_ratio = (gondor_inj / gondor_word_count_corpus) 
    gondor_det_ratio = (gondor_det / gondor_word_count_corpus)
    gondor_negation_ratio = (gondor_negation / gondor_word_count_corpus) 
    gondor_funct_words_ratio = (gondor_funct_words / gondor_word_count_corpus) 

    all_words = gondor_word_count_corpus + mordor_word_count_corpus
    mordor_speech_ratio_corpus = (mordor_word_count_corpus / all_words) * 100
    gondor_speech_ratio_corpus = (gondor_word_count_corpus / all_words) * 100
    
    results_mordor = {
        "PairNo": pair,
        "LabName": "Mordor",
        "lexical_diversity": mordor_lexical_diversity_corpus,
        "word_avg": mordor_word_avg_corpus,
        "speech_ratio": mordor_speech_ratio_corpus,
        "hes": mordor_hes_ratio,
        "laugh": mordor_laugh_ratio,
        "question": mordor_kerdojel_ratio,
        "hum": mordor_hum_ratio,
        "s_first_person": mordor_s_first_person_ratio,
        "s_second_person": mordor_s_second_person_ratio,
        "s_third_person": mordor_s_third_person_ratio,
        "p_first_person": mordor_p_first_person_ratio,
        "p_second_person": mordor_p_second_person_ratio,
        "p_third_person": mordor_p_third_person_ratio,
        "present": mordor_present_ratio,
        "past": mordor_past_ratio,
        "future": mordor_future_ratio,
        "cond": mordor_cond_ratio,
        "verb": mordor_verb_ratio,
        "noun": mordor_noun_ratio,
        "adj": mordor_adj_ratio,
        "adv": mordor_adv_ratio,
        "num": mordor_num_ratio,
        "article": mordor_art_ratio,
        "preverb": mordor_prev_ratio,
        "postverb": mordor_post_ratio,
        "pronoun": mordor_pro_ratio,
        "conjunction": mordor_conj_ratio,
        "interjection": mordor_inj_ratio,
        "det": mordor_det_ratio,
        "funct_words": mordor_funct_words_ratio,
        "negation": mordor_negation_ratio,
        "happiness": mordor_happiness_ratio,
        "sadness": mordor_sadness_ratio,
        "anger": mordor_anger_ratio,
        "fear": mordor_fear_ratio,
        "disgust": mordor_disgust_ratio,
        "no_emotion": mordor_no_emotion_ratio,
        "positive": mordor_positive_ratio,
        "negative": mordor_negative_ratio,
        "neutral": mordor_neutral_ratio,
    }

    results_gondor = {
        "PairNo": pair,
        "LabName": "Gondor",
        "lexical_diversity": gondor_lexical_diversity_corpus,
        "word_avg": gondor_word_avg_corpus,
        "speech_ratio": gondor_speech_ratio_corpus,
        "hes": gondor_hes_ratio,
        "laugh": gondor_laugh_ratio,
        "question": gondor_kerdojel_ratio,
        "hum": gondor_hum_ratio,
        "s_first_person": gondor_s_first_person_ratio,
        "s_second_person": gondor_s_second_person_ratio,
        "s_third_person": gondor_s_third_person_ratio,
        "p_first_person": gondor_p_first_person_ratio,
        "p_second_person": gondor_p_second_person_ratio,
        "p_third_person": gondor_p_third_person_ratio,
        "present": gondor_present_ratio,
        "past": gondor_past_ratio,
        "future": gondor_future_ratio,
        "cond": gondor_cond_ratio,
        "verb": gondor_verb_ratio,
        "noun": gondor_noun_ratio,
        "adj": gondor_adj_ratio,
        "adv": gondor_adv_ratio,
        "num": gondor_num_ratio,
        "article": gondor_art_ratio,
        "preverb": gondor_prev_ratio,
        "postverb": gondor_post_ratio,
        "pronoun": gondor_pro_ratio,
        "conjunction": gondor_conj_ratio,
        "interjection": gondor_inj_ratio,
        "det": gondor_det_ratio,
        "funct_words": gondor_funct_words_ratio,
        "negation": gondor_negation_ratio,
        "happiness": gondor_happiness_ratio,
        "sadness": gondor_sadness_ratio,
        "anger": gondor_anger_ratio,
        "fear": gondor_fear_ratio,
        "disgust": gondor_disgust_ratio,
        "no_emotion": gondor_no_emotion_ratio,
        "positive": gondor_positive_ratio,
        "negative": gondor_negative_ratio,
        "neutral": gondor_neutral_ratio,  
    }
    results_list_mordor.append(results_mordor)
    results_list_gondor.append(results_gondor)

In [None]:
df_results_mordor = pd.DataFrame(results_list_mordor)
df_results_gondor = pd.DataFrame(results_list_gondor)

## Linguistic analysis of the first third of the corpus

In [16]:
input_files = [f for f in os.listdir(ddir) if os.path.isfile(os.path.join(ddir, f))]
meta2idx = {v: k for k,v in meta.items()}

results_list_mordor_first_third = []
results_list_gondor_first_third = []

for input_file in input_files:
    with open(os.path.join(ddir, input_file)) as infile:
        txt = infile.read()
    pair = re.search(r'pair(\d+)_', input_file).group(1)
    dialogue_docs = [d for d in meta.values() if d.startswith(input_file)] # a dialógushoz tartozó dokumentumok listája
    dialogue_idx = [meta2idx[d] for d in dialogue_docs] # a dialógushoz tartozó dokumentumok indexeinek listája
    mordor = [d for d in dialogue_docs if "mordor" in d] # a Mordor nevű karakterhez tartozó dokumentumok listája
    mordor_idx = [meta2idx[d] for d in mordor] # a Mordor nevű karakterhez tartozó dokumentumok indexeinek listája
    mordor_third = len(mordor_idx) // 3
    mordor_idx_first_third = mordor_idx[:mordor_third]
    mordor_idx_second_third = mordor_idx[mordor_third:2*mordor_third]
    mordor_idx_third_third = mordor_idx[2*mordor_third:]
    mordor_words_corpus = [lemmatized_clean[i] for i in mordor_idx_first_third]
    mordor_words_flatten_corpus = [item for sublist in mordor_words_corpus for item in sublist]
    mordor_word_count_corpus = len(mordor_words_flatten_corpus) 
    mordor_unique_word_count_corpus = len(set(mordor_words_flatten_corpus))
    mordor_lexical_diversity_corpus = mordor_unique_word_count_corpus / mordor_word_count_corpus
    mordor_word_avg_corpus = mordor_word_count_corpus / len(mordor_words_corpus)

    mordor_words_raw = [lemmatized_raw[i] for i in mordor_idx_first_third]
    mordor_words_flatten_raw = [item for sublist in mordor_words_raw for item in sublist]
    mordor_word_count_raw = len(mordor_words_flatten_raw)
    mordor_hes = sum([lemmatized_raw[i].count("hes") for i in mordor_idx_first_third])
    mordor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in mordor_idx_first_third])
    mordor_hum = sum([lemmatized_raw[i].count("hum") for i in mordor_idx_first_third])
    mordor_kerdojel = sum([lemmatized_raw[i].count("?") for i in mordor_idx_first_third])
    mordor_hes_ratio = mordor_hes / mordor_word_count_raw
    mordor_laugh_ratio = mordor_laugh / mordor_word_count_raw
    mordor_hum_ratio = mordor_hum / mordor_word_count_raw
    mordor_kerdojel_ratio = mordor_kerdojel / mordor_word_count_raw
    
    mordor_anger = sum([emotion[i].count("anger") for i in mordor_idx_first_third])
    mordor_disgust = sum([emotion[i].count("disgust") for i in mordor_idx_first_third])
    mordor_fear = sum([emotion[i].count("fear") for i in mordor_idx_first_third])
    mordor_happiness = sum([emotion[i].count("joy") for i in mordor_idx_first_third])
    mordor_sadness = sum([emotion[i].count("sadness") for i in mordor_idx_first_third])
    mordor_no_emotion = sum([emotion[i].count("none_of_them") for i in mordor_idx_first_third])
    
    mordor_anger_ratio = mordor_anger / len(mordor_idx_first_third)
    mordor_disgust_ratio = mordor_disgust / len(mordor_idx_first_third)
    mordor_fear_ratio = mordor_fear / len(mordor_idx_first_third)
    mordor_happiness_ratio = mordor_happiness / len(mordor_idx_first_third)
    mordor_sadness_ratio = mordor_sadness / len(mordor_idx_first_third)
    mordor_no_emotion_ratio = mordor_no_emotion / len(mordor_idx_first_third)
    
   
    mordor_positive = sum(positive.count(item) for i in mordor_idx_first_third for item in sentiment[i])
    mordor_negative = sum(negative.count(item) for i in mordor_idx_first_third for item in sentiment[i])
    mordor_neutral = sum(neutral.count(item) for i in mordor_idx_first_third for item in sentiment[i])
    mordor_sentiment_sum = mordor_positive + mordor_negative + mordor_neutral
    mordor_positive_ratio = mordor_positive / mordor_sentiment_sum
    mordor_negative_ratio = mordor_negative / mordor_sentiment_sum
    mordor_neutral_ratio = mordor_neutral / mordor_sentiment_sum

    mordor_s_first_person = sum(len(pattern_1.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_s_second_person = sum(len(pattern_2.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_s_third_person = sum(len(pattern_3.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_p_first_person = sum(len(pattern_4.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_p_second_person = sum(len(pattern_5.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_p_third_person = sum(len(pattern_6.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_present = sum(len(pattern_7.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_past = sum(len(pattern_8.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_future = sum(len(pattern_9.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_cond = sum(len(pattern_10.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_verb = sum(len(pattern_11.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_noun = sum(len(pattern_12.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_adj = sum(len(pattern_13.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_num = sum(len(pattern_14.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_adv = sum(len(pattern_15.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_art = sum(len(pattern_16.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_prev = sum(len(pattern_17.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_post = sum(len(pattern_18.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_pro = sum(len(pattern_19.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_conj = sum(len(pattern_20.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_inj = sum(len(pattern_21.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_det = sum(len(pattern_22.findall(item)) for i in mordor_idx_first_third for item in pos[i])
    mordor_negation = sum([negation.count(word) for i in mordor_idx_first_third for word in lemmatized_clean[i]])
    
    mordor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in mordor_idx_first_third for item in pos[i])

    mordor_s_first_person_ratio = (mordor_s_first_person / mordor_word_count_corpus) 
    mordor_s_second_person_ratio = (mordor_s_second_person / mordor_word_count_corpus) 
    mordor_s_third_person_ratio = (mordor_s_third_person / mordor_word_count_corpus) 
    mordor_p_first_person_ratio = (mordor_p_first_person / mordor_word_count_corpus) 
    mordor_p_second_person_ratio = (mordor_p_second_person / mordor_word_count_corpus) 
    mordor_p_third_person_ratio = (mordor_p_third_person / mordor_word_count_corpus) 
    mordor_present_ratio = (mordor_present / mordor_word_count_corpus) 
    mordor_past_ratio = (mordor_past / mordor_word_count_corpus) 
    mordor_future_ratio = (mordor_future / mordor_word_count_corpus) 
    mordor_cond_ratio = (mordor_cond / mordor_word_count_corpus) 
    mordor_verb_ratio = (mordor_verb / mordor_word_count_corpus)
    mordor_noun_ratio = (mordor_noun / mordor_word_count_corpus) 
    mordor_adj_ratio = (mordor_adj / mordor_word_count_corpus) 
    mordor_adv_ratio = (mordor_adv / mordor_word_count_corpus) 
    mordor_num_ratio = (mordor_num / mordor_word_count_corpus) 
    mordor_art_ratio = (mordor_art / mordor_word_count_corpus) 
    mordor_prev_ratio = (mordor_prev / mordor_word_count_corpus) 
    mordor_post_ratio = (mordor_post / mordor_word_count_corpus) 
    mordor_pro_ratio = (mordor_pro / mordor_word_count_corpus) 
    mordor_conj_ratio = (mordor_conj / mordor_word_count_corpus) 
    mordor_inj_ratio = (mordor_inj / mordor_word_count_corpus) 
    mordor_det_ratio = (mordor_det / mordor_word_count_corpus)
    mordor_negation_ratio = (mordor_negation / mordor_word_count_corpus) 
    mordor_funct_words_ratio = (mordor_funct_words / mordor_word_count_corpus) 
    

    gondor = [d for d in dialogue_docs if "gondor" in d] # a Gondor nevű karakterhez tartozó dokumentumok listája
    gondor_idx = [meta2idx[d] for d in gondor] # a Gondor nevű karakterhez tartozó dokumentumok indexeinek listája
    gondor_third = len(gondor_idx) // 3
    gondor_idx_first_third = gondor_idx[:gondor_third]
    gondor_idx_second_third = gondor_idx[gondor_third:2*gondor_third]
    gondor_idx_third_third = gondor_idx[2*gondor_third:]

    gondor_words_corpus = [lemmatized_clean[i] for i in gondor_idx_first_third]
    gondor_words_flatten_corpus = [item for sublist in gondor_words_corpus for item in sublist]
    gondor_word_count_corpus = len(gondor_words_flatten_corpus) 
    gondor_unique_word_count_corpus = len(set(gondor_words_flatten_corpus))
    gondor_lexical_diversity_corpus = gondor_unique_word_count_corpus / gondor_word_count_corpus
    gondor_word_avg_corpus = gondor_word_count_corpus / len(gondor_words_corpus)

    gondor_words_raw = [lemmatized_raw[i] for i in gondor_idx_first_third]
    gondor_words_flatten_raw = [item for sublist in gondor_words_raw for item in sublist]
    gondor_word_count_raw = len(gondor_words_flatten_raw)
    gondor_hes = sum([lemmatized_raw[i].count("hes") for i in gondor_idx_first_third])
    gondor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in gondor_idx_first_third])
    gondor_hum = sum([lemmatized_raw[i].count("hum") for i in gondor_idx_first_third])
    gondor_kerdojel = sum([lemmatized_raw[i].count("?") for i in gondor_idx_first_third])
    gondor_hes_ratio = gondor_hes / gondor_word_count_raw
    gondor_laugh_ratio = gondor_laugh / gondor_word_count_raw
    gondor_hum_ratio = gondor_hum / gondor_word_count_raw
    gondor_kerdojel_ratio = gondor_kerdojel / gondor_word_count_raw
    
    gondor_anger = sum([emotion[i].count("anger") for i in gondor_idx_first_third])
    gondor_disgust = sum([emotion[i].count("disgust") for i in gondor_idx_first_third])
    gondor_fear = sum([emotion[i].count("fear") for i in gondor_idx_first_third])
    gondor_happiness = sum([emotion[i].count("joy") for i in gondor_idx_first_third])
    gondor_sadness = sum([emotion[i].count("sadness") for i in gondor_idx_first_third])
    gondor_no_emotion = sum([emotion[i].count("none_of_them") for i in gondor_idx_first_third])
    gondor_anger_ratio = (gondor_anger / len(gondor_idx_first_third))
    gondor_disgust_ratio = (gondor_disgust / len(gondor_idx_first_third))
    gondor_fear_ratio = (gondor_fear / len(gondor_idx_first_third)) 
    gondor_happiness_ratio = gondor_happiness / len(gondor_idx_first_third)
    gondor_sadness_ratio = gondor_sadness / len(gondor_idx_first_third)
    gondor_no_emotion_ratio = gondor_no_emotion / len(gondor_idx_first_third)
    
    gondor_positive = sum(positive.count(item) for i in gondor_idx_first_third for item in sentiment[i])
    gondor_negative = sum(negative.count(item) for i in gondor_idx_first_third for item in sentiment[i])
    gondor_neutral = sum(neutral.count(item) for i in gondor_idx_first_third for item in sentiment[i])
    gondor_sentiment_sum = gondor_positive + gondor_negative + gondor_neutral
    gondor_positive_ratio = (gondor_positive / gondor_sentiment_sum)
    gondor_negative_ratio = (gondor_negative / gondor_sentiment_sum)
    gondor_neutral_ratio = (gondor_neutral / gondor_sentiment_sum) 

    gondor_s_first_person = sum(len(pattern_1.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_s_second_person = sum(len(pattern_2.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_s_third_person = sum(len(pattern_3.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_p_first_person = sum(len(pattern_4.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_p_second_person = sum(len(pattern_5.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_p_third_person = sum(len(pattern_6.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_present = sum(len(pattern_7.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_past = sum(len(pattern_8.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_future = sum(len(pattern_9.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_cond = sum(len(pattern_10.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_verb = sum(len(pattern_11.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_noun = sum(len(pattern_12.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_adj = sum(len(pattern_13.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_num = sum(len(pattern_14.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_adv = sum(len(pattern_15.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_art = sum(len(pattern_16.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_prev = sum(len(pattern_17.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_post = sum(len(pattern_18.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_pro = sum(len(pattern_19.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_conj = sum(len(pattern_20.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_inj = sum(len(pattern_21.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_det = sum(len(pattern_22.findall(item)) for i in gondor_idx_first_third for item in pos[i])
    gondor_negation = sum([negation.count(word) for i in gondor_idx_first_third for word in lemmatized_clean[i]])
    gondor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in gondor_idx_first_third for item in pos[i])
    

    gondor_s_first_person_ratio = (gondor_s_first_person / gondor_word_count_corpus) 
    gondor_s_second_person_ratio = (gondor_s_second_person / gondor_word_count_corpus) 
    gondor_s_third_person_ratio = (gondor_s_third_person / gondor_word_count_corpus) 
    gondor_p_first_person_ratio = (gondor_p_first_person / gondor_word_count_corpus) 
    gondor_p_second_person_ratio = (gondor_p_second_person / gondor_word_count_corpus) 
    gondor_p_third_person_ratio = (gondor_p_third_person / gondor_word_count_corpus) 
    gondor_present_ratio = (gondor_present / gondor_word_count_corpus) 
    gondor_past_ratio = (gondor_past / gondor_word_count_corpus) 
    gondor_future_ratio = (gondor_future / gondor_word_count_corpus) 
    gondor_cond_ratio = (gondor_cond / gondor_word_count_corpus) 
    gondor_verb_ratio = (gondor_verb / gondor_word_count_corpus) 
    gondor_noun_ratio = (gondor_noun / gondor_word_count_corpus) 
    gondor_adj_ratio = (gondor_adj / gondor_word_count_corpus) 
    gondor_adv_ratio = (gondor_adv / gondor_word_count_corpus) 
    gondor_num_ratio = (gondor_num / gondor_word_count_corpus) 
    gondor_art_ratio = (gondor_art / gondor_word_count_corpus) 
    gondor_prev_ratio = (gondor_prev / gondor_word_count_corpus) 
    gondor_post_ratio = (gondor_post / gondor_word_count_corpus) 
    gondor_pro_ratio = (gondor_pro / gondor_word_count_corpus) 
    gondor_conj_ratio = (gondor_conj / gondor_word_count_corpus) 
    gondor_inj_ratio = (gondor_inj / gondor_word_count_corpus) 
    gondor_det_ratio = (gondor_det / gondor_word_count_corpus)
    gondor_negation_ratio = (gondor_negation / gondor_word_count_corpus) 
    gondor_funct_words_ratio = (gondor_funct_words / gondor_word_count_corpus) 

    all_words = gondor_word_count_corpus + mordor_word_count_corpus
    mordor_speech_ratio_corpus = (mordor_word_count_corpus / all_words) * 100
    gondor_speech_ratio_corpus = (gondor_word_count_corpus / all_words) * 100
    
    results_mordor = {
        "PairNo": pair,
        "LabName": "Mordor",
        "lexical_diversity": mordor_lexical_diversity_corpus,
        "word_avg": mordor_word_avg_corpus,
        "speech_ratio": mordor_speech_ratio_corpus,
        "hes": mordor_hes_ratio,
        "laugh": mordor_laugh_ratio,
        "question": mordor_kerdojel_ratio,
        "hum": mordor_hum_ratio,
        "s_first_person": mordor_s_first_person_ratio,
        "s_second_person": mordor_s_second_person_ratio,
        "s_third_person": mordor_s_third_person_ratio,
        "p_first_person": mordor_p_first_person_ratio,
        "p_second_person": mordor_p_second_person_ratio,
        "p_third_person": mordor_p_third_person_ratio,
        "present": mordor_present_ratio,
        "past": mordor_past_ratio,
        "future": mordor_future_ratio,
        "cond": mordor_cond_ratio,
        "verb": mordor_verb_ratio,
        "noun": mordor_noun_ratio,
        "adj": mordor_adj_ratio,
        "adv": mordor_adv_ratio,
        "num": mordor_num_ratio,
        "article": mordor_art_ratio,
        "preverb": mordor_prev_ratio,
        "postverb": mordor_post_ratio,
        "pronoun": mordor_pro_ratio,
        "conjunction": mordor_conj_ratio,
        "interjection": mordor_inj_ratio,
        "det": mordor_det_ratio,
        "funct_words": mordor_funct_words_ratio,
        "negation": mordor_negation_ratio,
        "happiness": mordor_happiness_ratio,
        "sadness": mordor_sadness_ratio,
        "anger": mordor_anger_ratio,
        "fear": mordor_fear_ratio,
        "disgust": mordor_disgust_ratio,
        "no_emotion": mordor_no_emotion_ratio,
        "positive": mordor_positive_ratio,
        "negative": mordor_negative_ratio,
        "neutral": mordor_neutral_ratio,
    }

    results_gondor = {
        "PairNo": pair,
        "LabName": "Gondor",
        "lexical_diversity": gondor_lexical_diversity_corpus,
        "word_avg": gondor_word_avg_corpus,
        "speech_ratio": gondor_speech_ratio_corpus,
        "hes": gondor_hes_ratio,
        "laugh": gondor_laugh_ratio,
        "question": gondor_kerdojel_ratio,
        "hum": gondor_hum_ratio,
        "s_first_person": gondor_s_first_person_ratio,
        "s_second_person": gondor_s_second_person_ratio,
        "s_third_person": gondor_s_third_person_ratio,
        "p_first_person": gondor_p_first_person_ratio,
        "p_second_person": gondor_p_second_person_ratio,
        "p_third_person": gondor_p_third_person_ratio,
        "present": gondor_present_ratio,
        "past": gondor_past_ratio,
        "future": gondor_future_ratio,
        "cond": gondor_cond_ratio,
        "verb": gondor_verb_ratio,
        "noun": gondor_noun_ratio,
        "adj": gondor_adj_ratio,
        "adv": gondor_adv_ratio,
        "num": gondor_num_ratio,
        "article": gondor_art_ratio,
        "preverb": gondor_prev_ratio,
        "postverb": gondor_post_ratio,
        "pronoun": gondor_pro_ratio,
        "conjunction": gondor_conj_ratio,
        "interjection": gondor_inj_ratio,
        "det": gondor_det_ratio,
        "funct_words": gondor_funct_words_ratio,
        "negation": gondor_negation_ratio,
        "happiness": gondor_happiness_ratio,
        "sadness": gondor_sadness_ratio,
        "anger": gondor_anger_ratio,
        "fear": gondor_fear_ratio,
        "disgust": gondor_disgust_ratio,
        "no_emotion": gondor_no_emotion_ratio,
        "positive": gondor_positive_ratio,
        "negative": gondor_negative_ratio,
        "neutral": gondor_neutral_ratio,  
    }
    results_list_mordor_first_third.append(results_mordor)
    results_list_gondor_first_third.append(results_gondor)

In [17]:
df_results_mordor_first_third = pd.DataFrame(results_list_mordor_first_third)
df_results_gondor_first_third = pd.DataFrame(results_list_gondor_first_third)

## Linguistic analysis of the second third of the corpus

In [23]:
input_files = [f for f in os.listdir(ddir) if os.path.isfile(os.path.join(ddir, f))]
meta2idx = {v: k for k,v in meta.items()}

results_list_mordor_second_third = []
results_list_gondor_second_third = []

for input_file in input_files:
    with open(os.path.join(ddir, input_file)) as infile:
        txt = infile.read()
    pair = re.search(r'pair(\d+)_', input_file).group(1)
    dialogue_docs = [d for d in meta.values() if d.startswith(input_file)] # a dialógushoz tartozó dokumentumok listája
    dialogue_idx = [meta2idx[d] for d in dialogue_docs] # a dialógushoz tartozó dokumentumok indexeinek listája
    mordor = [d for d in dialogue_docs if "mordor" in d] # a Mordor nevű karakterhez tartozó dokumentumok listája
    mordor_idx = [meta2idx[d] for d in mordor] # a Mordor nevű karakterhez tartozó dokumentumok indexeinek listája
    mordor_third = len(mordor_idx) // 3
    mordor_idx_first_third = mordor_idx[:mordor_third]
    mordor_idx_second_third = mordor_idx[mordor_third:2*mordor_third]
    mordor_idx_third_third = mordor_idx[2*mordor_third:]
    mordor_words_corpus = [lemmatized_clean[i] for i in mordor_idx_second_third]
    mordor_words_flatten_corpus = [item for sublist in mordor_words_corpus for item in sublist]
    mordor_word_count_corpus = len(mordor_words_flatten_corpus) 
    mordor_unique_word_count_corpus = len(set(mordor_words_flatten_corpus))
    mordor_lexical_diversity_corpus = mordor_unique_word_count_corpus / mordor_word_count_corpus
    mordor_word_avg_corpus = mordor_word_count_corpus / len(mordor_words_corpus)

    mordor_words_raw = [lemmatized_raw[i] for i in mordor_idx_second_third]
    mordor_words_flatten_raw = [item for sublist in mordor_words_raw for item in sublist]
    mordor_word_count_raw = len(mordor_words_flatten_raw)
    mordor_hes = sum([lemmatized_raw[i].count("hes") for i in mordor_idx_second_third])
    mordor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in mordor_idx_second_third])
    mordor_hum = sum([lemmatized_raw[i].count("hum") for i in mordor_idx_second_third])
    mordor_kerdojel = sum([lemmatized_raw[i].count("?") for i in mordor_idx_second_third])
    mordor_hes_ratio = mordor_hes / mordor_word_count_raw
    mordor_laugh_ratio = mordor_laugh / mordor_word_count_raw
    mordor_hum_ratio = mordor_hum / mordor_word_count_raw
    mordor_kerdojel_ratio = mordor_kerdojel / mordor_word_count_raw
    
    mordor_anger = sum([emotion[i].count("anger") for i in mordor_idx_second_third])
    mordor_disgust = sum([emotion[i].count("disgust") for i in mordor_idx_second_third])
    mordor_fear = sum([emotion[i].count("fear") for i in mordor_idx_second_third])
    mordor_happiness = sum([emotion[i].count("joy") for i in mordor_idx_second_third])
    mordor_sadness = sum([emotion[i].count("sadness") for i in mordor_idx_second_third])
    mordor_no_emotion = sum([emotion[i].count("none_of_them") for i in mordor_idx_second_third])
    
    mordor_anger_ratio = mordor_anger / len(mordor_idx_second_third)
    mordor_disgust_ratio = mordor_disgust / len(mordor_idx_second_third)
    mordor_fear_ratio = mordor_fear / len(mordor_idx_second_third)
    mordor_happiness_ratio = mordor_happiness / len(mordor_idx_second_third)
    mordor_sadness_ratio = mordor_sadness / len(mordor_idx_second_third)
    mordor_no_emotion_ratio = mordor_no_emotion / len(mordor_idx_second_third)
    
   
    mordor_positive = sum(positive.count(item) for i in mordor_idx_second_third for item in sentiment[i])
    mordor_negative = sum(negative.count(item) for i in mordor_idx_second_third for item in sentiment[i])
    mordor_neutral = sum(neutral.count(item) for i in mordor_idx_second_third for item in sentiment[i])
    mordor_sentiment_sum = mordor_positive + mordor_negative + mordor_neutral
    mordor_positive_ratio = mordor_positive / mordor_sentiment_sum
    mordor_negative_ratio = mordor_negative / mordor_sentiment_sum
    mordor_neutral_ratio = mordor_neutral / mordor_sentiment_sum

    mordor_s_first_person = sum(len(pattern_1.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_s_second_person = sum(len(pattern_2.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_s_third_person = sum(len(pattern_3.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_p_first_person = sum(len(pattern_4.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_p_second_person = sum(len(pattern_5.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_p_third_person = sum(len(pattern_6.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_present = sum(len(pattern_7.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_past = sum(len(pattern_8.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_future = sum(len(pattern_9.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_cond = sum(len(pattern_10.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_verb = sum(len(pattern_11.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_noun = sum(len(pattern_12.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_adj = sum(len(pattern_13.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_num = sum(len(pattern_14.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_adv = sum(len(pattern_15.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_art = sum(len(pattern_16.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_prev = sum(len(pattern_17.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_post = sum(len(pattern_18.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_pro = sum(len(pattern_19.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_conj = sum(len(pattern_20.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_inj = sum(len(pattern_21.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_det = sum(len(pattern_22.findall(item)) for i in mordor_idx_second_third for item in pos[i])
    mordor_negation = sum([negation.count(word) for i in mordor_idx_second_third for word in lemmatized_clean[i]])
    
    mordor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in mordor_idx_second_third for item in pos[i])

    mordor_s_first_person_ratio = (mordor_s_first_person / mordor_word_count_corpus) 
    mordor_s_second_person_ratio = (mordor_s_second_person / mordor_word_count_corpus) 
    mordor_s_third_person_ratio = (mordor_s_third_person / mordor_word_count_corpus) 
    mordor_p_first_person_ratio = (mordor_p_first_person / mordor_word_count_corpus) 
    mordor_p_second_person_ratio = (mordor_p_second_person / mordor_word_count_corpus) 
    mordor_p_third_person_ratio = (mordor_p_third_person / mordor_word_count_corpus) 
    mordor_present_ratio = (mordor_present / mordor_word_count_corpus) 
    mordor_past_ratio = (mordor_past / mordor_word_count_corpus) 
    mordor_future_ratio = (mordor_future / mordor_word_count_corpus) 
    mordor_cond_ratio = (mordor_cond / mordor_word_count_corpus) 
    mordor_verb_ratio = (mordor_verb / mordor_word_count_corpus)
    mordor_noun_ratio = (mordor_noun / mordor_word_count_corpus) 
    mordor_adj_ratio = (mordor_adj / mordor_word_count_corpus) 
    mordor_adv_ratio = (mordor_adv / mordor_word_count_corpus) 
    mordor_num_ratio = (mordor_num / mordor_word_count_corpus) 
    mordor_art_ratio = (mordor_art / mordor_word_count_corpus) 
    mordor_prev_ratio = (mordor_prev / mordor_word_count_corpus) 
    mordor_post_ratio = (mordor_post / mordor_word_count_corpus) 
    mordor_pro_ratio = (mordor_pro / mordor_word_count_corpus) 
    mordor_conj_ratio = (mordor_conj / mordor_word_count_corpus) 
    mordor_inj_ratio = (mordor_inj / mordor_word_count_corpus) 
    mordor_det_ratio = (mordor_det / mordor_word_count_corpus)
    mordor_negation_ratio = (mordor_negation / mordor_word_count_corpus) 
    mordor_funct_words_ratio = (mordor_funct_words / mordor_word_count_corpus) 
    

    gondor = [d for d in dialogue_docs if "gondor" in d] # a Gondor nevű karakterhez tartozó dokumentumok listája
    gondor_idx = [meta2idx[d] for d in gondor] # a Gondor nevű karakterhez tartozó dokumentumok indexeinek listája
    gondor_third = len(gondor_idx) // 3
    gondor_idx_first_third = gondor_idx[:gondor_third]
    gondor_idx_second_third = gondor_idx[gondor_third:2*gondor_third]
    gondor_idx_third_third = gondor_idx[2*gondor_third:]

    gondor_words_corpus = [lemmatized_clean[i] for i in gondor_idx_second_third]
    gondor_words_flatten_corpus = [item for sublist in gondor_words_corpus for item in sublist]
    gondor_word_count_corpus = len(gondor_words_flatten_corpus) 
    gondor_unique_word_count_corpus = len(set(gondor_words_flatten_corpus))
    gondor_lexical_diversity_corpus = gondor_unique_word_count_corpus / gondor_word_count_corpus
    gondor_word_avg_corpus = gondor_word_count_corpus / len(gondor_words_corpus)

    gondor_words_raw = [lemmatized_raw[i] for i in gondor_idx_second_third]
    gondor_words_flatten_raw = [item for sublist in gondor_words_raw for item in sublist]
    gondor_word_count_raw = len(gondor_words_flatten_raw)
    gondor_hes = sum([lemmatized_raw[i].count("hes") for i in gondor_idx_second_third])
    gondor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in gondor_idx_second_third])
    gondor_hum = sum([lemmatized_raw[i].count("hum") for i in gondor_idx_second_third])
    gondor_kerdojel = sum([lemmatized_raw[i].count("?") for i in gondor_idx_second_third])
    gondor_hes_ratio = gondor_hes / gondor_word_count_raw
    gondor_laugh_ratio = gondor_laugh / gondor_word_count_raw
    gondor_hum_ratio = gondor_hum / gondor_word_count_raw
    gondor_kerdojel_ratio = gondor_kerdojel / gondor_word_count_raw
    
    gondor_anger = sum([emotion[i].count("anger") for i in gondor_idx_second_third])
    gondor_disgust = sum([emotion[i].count("disgust") for i in gondor_idx_second_third])
    gondor_fear = sum([emotion[i].count("fear") for i in gondor_idx_second_third])
    gondor_happiness = sum([emotion[i].count("joy") for i in gondor_idx_second_third])
    gondor_sadness = sum([emotion[i].count("sadness") for i in gondor_idx_second_third])
    gondor_no_emotion = sum([emotion[i].count("none_of_them") for i in gondor_idx_second_third])
    gondor_anger_ratio = (gondor_anger / len(gondor_idx_second_third))
    gondor_disgust_ratio = (gondor_disgust / len(gondor_idx_second_third))
    gondor_fear_ratio = (gondor_fear / len(gondor_idx_second_third)) 
    gondor_happiness_ratio = gondor_happiness / len(gondor_idx_second_third)
    gondor_sadness_ratio = gondor_sadness / len(gondor_idx_second_third)
    gondor_no_emotion_ratio = gondor_no_emotion / len(gondor_idx_second_third)
    
    gondor_positive = sum(positive.count(item) for i in gondor_idx_second_third for item in sentiment[i])
    gondor_negative = sum(negative.count(item) for i in gondor_idx_second_third for item in sentiment[i])
    gondor_neutral = sum(neutral.count(item) for i in gondor_idx_second_third for item in sentiment[i])
    gondor_sentiment_sum = gondor_positive + gondor_negative + gondor_neutral
    gondor_positive_ratio = (gondor_positive / gondor_sentiment_sum)
    gondor_negative_ratio = (gondor_negative / gondor_sentiment_sum)
    gondor_neutral_ratio = (gondor_neutral / gondor_sentiment_sum) 

    gondor_s_first_person = sum(len(pattern_1.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_s_second_person = sum(len(pattern_2.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_s_third_person = sum(len(pattern_3.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_p_first_person = sum(len(pattern_4.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_p_second_person = sum(len(pattern_5.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_p_third_person = sum(len(pattern_6.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_present = sum(len(pattern_7.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_past = sum(len(pattern_8.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_future = sum(len(pattern_9.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_cond = sum(len(pattern_10.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_verb = sum(len(pattern_11.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_noun = sum(len(pattern_12.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_adj = sum(len(pattern_13.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_num = sum(len(pattern_14.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_adv = sum(len(pattern_15.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_art = sum(len(pattern_16.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_prev = sum(len(pattern_17.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_post = sum(len(pattern_18.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_pro = sum(len(pattern_19.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_conj = sum(len(pattern_20.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_inj = sum(len(pattern_21.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_det = sum(len(pattern_22.findall(item)) for i in gondor_idx_second_third for item in pos[i])
    gondor_negation = sum([negation.count(word) for i in gondor_idx_second_third for word in lemmatized_clean[i]])
    gondor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in gondor_idx_second_third for item in pos[i])
    

    gondor_s_first_person_ratio = (gondor_s_first_person / gondor_word_count_corpus) 
    gondor_s_second_person_ratio = (gondor_s_second_person / gondor_word_count_corpus) 
    gondor_s_third_person_ratio = (gondor_s_third_person / gondor_word_count_corpus) 
    gondor_p_first_person_ratio = (gondor_p_first_person / gondor_word_count_corpus) 
    gondor_p_second_person_ratio = (gondor_p_second_person / gondor_word_count_corpus) 
    gondor_p_third_person_ratio = (gondor_p_third_person / gondor_word_count_corpus) 
    gondor_present_ratio = (gondor_present / gondor_word_count_corpus) 
    gondor_past_ratio = (gondor_past / gondor_word_count_corpus) 
    gondor_future_ratio = (gondor_future / gondor_word_count_corpus) 
    gondor_cond_ratio = (gondor_cond / gondor_word_count_corpus) 
    gondor_verb_ratio = (gondor_verb / gondor_word_count_corpus) 
    gondor_noun_ratio = (gondor_noun / gondor_word_count_corpus) 
    gondor_adj_ratio = (gondor_adj / gondor_word_count_corpus) 
    gondor_adv_ratio = (gondor_adv / gondor_word_count_corpus) 
    gondor_num_ratio = (gondor_num / gondor_word_count_corpus) 
    gondor_art_ratio = (gondor_art / gondor_word_count_corpus) 
    gondor_prev_ratio = (gondor_prev / gondor_word_count_corpus) 
    gondor_post_ratio = (gondor_post / gondor_word_count_corpus) 
    gondor_pro_ratio = (gondor_pro / gondor_word_count_corpus) 
    gondor_conj_ratio = (gondor_conj / gondor_word_count_corpus) 
    gondor_inj_ratio = (gondor_inj / gondor_word_count_corpus) 
    gondor_det_ratio = (gondor_det / gondor_word_count_corpus)
    gondor_negation_ratio = (gondor_negation / gondor_word_count_corpus) 
    gondor_funct_words_ratio = (gondor_funct_words / gondor_word_count_corpus) 

    all_words = gondor_word_count_corpus + mordor_word_count_corpus
    mordor_speech_ratio_corpus = (mordor_word_count_corpus / all_words) * 100
    gondor_speech_ratio_corpus = (gondor_word_count_corpus / all_words) * 100
    
    results_mordor = {
        "PairNo": pair,
        "LabName": "Mordor",
        "lexical_diversity": mordor_lexical_diversity_corpus,
        "word_avg": mordor_word_avg_corpus,
        "speech_ratio": mordor_speech_ratio_corpus,
        "hes": mordor_hes_ratio,
        "laugh": mordor_laugh_ratio,
        "question": mordor_kerdojel_ratio,
        "hum": mordor_hum_ratio,
        "s_first_person": mordor_s_first_person_ratio,
        "s_second_person": mordor_s_second_person_ratio,
        "s_third_person": mordor_s_third_person_ratio,
        "p_first_person": mordor_p_first_person_ratio,
        "p_second_person": mordor_p_second_person_ratio,
        "p_third_person": mordor_p_third_person_ratio,
        "present": mordor_present_ratio,
        "past": mordor_past_ratio,
        "future": mordor_future_ratio,
        "cond": mordor_cond_ratio,
        "verb": mordor_verb_ratio,
        "noun": mordor_noun_ratio,
        "adj": mordor_adj_ratio,
        "adv": mordor_adv_ratio,
        "num": mordor_num_ratio,
        "article": mordor_art_ratio,
        "preverb": mordor_prev_ratio,
        "postverb": mordor_post_ratio,
        "pronoun": mordor_pro_ratio,
        "conjunction": mordor_conj_ratio,
        "interjection": mordor_inj_ratio,
        "det": mordor_det_ratio,
        "funct_words": mordor_funct_words_ratio,
        "negation": mordor_negation_ratio,
        "happiness": mordor_happiness_ratio,
        "sadness": mordor_sadness_ratio,
        "anger": mordor_anger_ratio,
        "fear": mordor_fear_ratio,
        "disgust": mordor_disgust_ratio,
        "no_emotion": mordor_no_emotion_ratio,
        "positive": mordor_positive_ratio,
        "negative": mordor_negative_ratio,
        "neutral": mordor_neutral_ratio,
    }

    results_gondor = {
        "PairNo": pair,
        "LabName": "Gondor",
        "lexical_diversity": gondor_lexical_diversity_corpus,
        "word_avg": gondor_word_avg_corpus,
        "speech_ratio": gondor_speech_ratio_corpus,
        "hes": gondor_hes_ratio,
        "laugh": gondor_laugh_ratio,
        "question": gondor_kerdojel_ratio,
        "hum": gondor_hum_ratio,
        "s_first_person": gondor_s_first_person_ratio,
        "s_second_person": gondor_s_second_person_ratio,
        "s_third_person": gondor_s_third_person_ratio,
        "p_first_person": gondor_p_first_person_ratio,
        "p_second_person": gondor_p_second_person_ratio,
        "p_third_person": gondor_p_third_person_ratio,
        "present": gondor_present_ratio,
        "past": gondor_past_ratio,
        "future": gondor_future_ratio,
        "cond": gondor_cond_ratio,
        "verb": gondor_verb_ratio,
        "noun": gondor_noun_ratio,
        "adj": gondor_adj_ratio,
        "adv": gondor_adv_ratio,
        "num": gondor_num_ratio,
        "article": gondor_art_ratio,
        "preverb": gondor_prev_ratio,
        "postverb": gondor_post_ratio,
        "pronoun": gondor_pro_ratio,
        "conjunction": gondor_conj_ratio,
        "interjection": gondor_inj_ratio,
        "det": gondor_det_ratio,
        "funct_words": gondor_funct_words_ratio,
        "negation": gondor_negation_ratio,
        "happiness": gondor_happiness_ratio,
        "sadness": gondor_sadness_ratio,
        "anger": gondor_anger_ratio,
        "fear": gondor_fear_ratio,
        "disgust": gondor_disgust_ratio,
        "no_emotion": gondor_no_emotion_ratio,
        "positive": gondor_positive_ratio,
        "negative": gondor_negative_ratio,
        "neutral": gondor_neutral_ratio,  
    }
    results_list_mordor_second_third.append(results_mordor)
    results_list_gondor_second_third.append(results_gondor)

In [24]:
df_results_mordor_second_third = pd.DataFrame(results_list_mordor_second_third)
df_results_gondor_second_third = pd.DataFrame(results_list_gondor_second_third)

## Linguistic analysis of the third third of the corpus

In [28]:
input_files = [f for f in os.listdir(ddir) if os.path.isfile(os.path.join(ddir, f))]
meta2idx = {v: k for k,v in meta.items()}

results_list_mordor_third_third = []
results_list_gondor_third_third = []

for input_file in input_files:
    with open(os.path.join(ddir, input_file)) as infile:
        txt = infile.read()
    pair = re.search(r'pair(\d+)_', input_file).group(1)
    dialogue_docs = [d for d in meta.values() if d.startswith(input_file)] # a dialógushoz tartozó dokumentumok listája
    dialogue_idx = [meta2idx[d] for d in dialogue_docs] # a dialógushoz tartozó dokumentumok indexeinek listája
    mordor = [d for d in dialogue_docs if "mordor" in d] # a Mordor nevű karakterhez tartozó dokumentumok listája
    mordor_idx = [meta2idx[d] for d in mordor] # a Mordor nevű karakterhez tartozó dokumentumok indexeinek listája
    mordor_third = len(mordor_idx) // 3
    mordor_idx_first_third = mordor_idx[:mordor_third]
    mordor_idx_second_third = mordor_idx[mordor_third:2*mordor_third]
    mordor_idx_third_third = mordor_idx[2*mordor_third:]
    mordor_words_corpus = [lemmatized_clean[i] for i in mordor_idx_third_third]
    mordor_words_flatten_corpus = [item for sublist in mordor_words_corpus for item in sublist]
    mordor_word_count_corpus = len(mordor_words_flatten_corpus) 
    mordor_unique_word_count_corpus = len(set(mordor_words_flatten_corpus))
    mordor_lexical_diversity_corpus = mordor_unique_word_count_corpus / mordor_word_count_corpus
    mordor_word_avg_corpus = mordor_word_count_corpus / len(mordor_words_corpus)

    mordor_words_raw = [lemmatized_raw[i] for i in mordor_idx_third_third]
    mordor_words_flatten_raw = [item for sublist in mordor_words_raw for item in sublist]
    mordor_word_count_raw = len(mordor_words_flatten_raw)
    mordor_hes = sum([lemmatized_raw[i].count("hes") for i in mordor_idx_third_third])
    mordor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in mordor_idx_third_third])
    mordor_hum = sum([lemmatized_raw[i].count("hum") for i in mordor_idx_third_third])
    mordor_kerdojel = sum([lemmatized_raw[i].count("?") for i in mordor_idx_third_third])
    mordor_hes_ratio = mordor_hes / mordor_word_count_raw
    mordor_laugh_ratio = mordor_laugh / mordor_word_count_raw
    mordor_hum_ratio = mordor_hum / mordor_word_count_raw
    mordor_kerdojel_ratio = mordor_kerdojel / mordor_word_count_raw
    
    mordor_anger = sum([emotion[i].count("anger") for i in mordor_idx_third_third])
    mordor_disgust = sum([emotion[i].count("disgust") for i in mordor_idx_third_third])
    mordor_fear = sum([emotion[i].count("fear") for i in mordor_idx_third_third])
    mordor_happiness = sum([emotion[i].count("joy") for i in mordor_idx_third_third])
    mordor_sadness = sum([emotion[i].count("sadness") for i in mordor_idx_third_third])
    mordor_no_emotion = sum([emotion[i].count("none_of_them") for i in mordor_idx_third_third])
    
    mordor_anger_ratio = mordor_anger / len(mordor_idx_third_third)
    mordor_disgust_ratio = mordor_disgust / len(mordor_idx_third_third)
    mordor_fear_ratio = mordor_fear / len(mordor_idx_third_third)
    mordor_happiness_ratio = mordor_happiness / len(mordor_idx_third_third)
    mordor_sadness_ratio = mordor_sadness / len(mordor_idx_third_third)
    mordor_no_emotion_ratio = mordor_no_emotion / len(mordor_idx_third_third)
    
   
    mordor_positive = sum(positive.count(item) for i in mordor_idx_third_third for item in sentiment[i])
    mordor_negative = sum(negative.count(item) for i in mordor_idx_third_third for item in sentiment[i])
    mordor_neutral = sum(neutral.count(item) for i in mordor_idx_third_third for item in sentiment[i])
    mordor_sentiment_sum = mordor_positive + mordor_negative + mordor_neutral
    mordor_positive_ratio = mordor_positive / mordor_sentiment_sum
    mordor_negative_ratio = mordor_negative / mordor_sentiment_sum
    mordor_neutral_ratio = mordor_neutral / mordor_sentiment_sum

    mordor_s_first_person = sum(len(pattern_1.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_s_second_person = sum(len(pattern_2.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_s_third_person = sum(len(pattern_3.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_p_first_person = sum(len(pattern_4.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_p_second_person = sum(len(pattern_5.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_p_third_person = sum(len(pattern_6.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_present = sum(len(pattern_7.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_past = sum(len(pattern_8.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_future = sum(len(pattern_9.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_cond = sum(len(pattern_10.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_verb = sum(len(pattern_11.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_noun = sum(len(pattern_12.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_adj = sum(len(pattern_13.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_num = sum(len(pattern_14.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_adv = sum(len(pattern_15.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_art = sum(len(pattern_16.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_prev = sum(len(pattern_17.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_post = sum(len(pattern_18.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_pro = sum(len(pattern_19.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_conj = sum(len(pattern_20.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_inj = sum(len(pattern_21.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_det = sum(len(pattern_22.findall(item)) for i in mordor_idx_third_third for item in pos[i])
    mordor_negation = sum([negation.count(word) for i in mordor_idx_third_third for word in lemmatized_clean[i]])
    
    mordor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in mordor_idx_third_third for item in pos[i])

    mordor_s_first_person_ratio = (mordor_s_first_person / mordor_word_count_corpus) 
    mordor_s_second_person_ratio = (mordor_s_second_person / mordor_word_count_corpus) 
    mordor_s_third_person_ratio = (mordor_s_third_person / mordor_word_count_corpus) 
    mordor_p_first_person_ratio = (mordor_p_first_person / mordor_word_count_corpus) 
    mordor_p_second_person_ratio = (mordor_p_second_person / mordor_word_count_corpus) 
    mordor_p_third_person_ratio = (mordor_p_third_person / mordor_word_count_corpus) 
    mordor_present_ratio = (mordor_present / mordor_word_count_corpus) 
    mordor_past_ratio = (mordor_past / mordor_word_count_corpus) 
    mordor_future_ratio = (mordor_future / mordor_word_count_corpus) 
    mordor_cond_ratio = (mordor_cond / mordor_word_count_corpus) 
    mordor_verb_ratio = (mordor_verb / mordor_word_count_corpus)
    mordor_noun_ratio = (mordor_noun / mordor_word_count_corpus) 
    mordor_adj_ratio = (mordor_adj / mordor_word_count_corpus) 
    mordor_adv_ratio = (mordor_adv / mordor_word_count_corpus) 
    mordor_num_ratio = (mordor_num / mordor_word_count_corpus) 
    mordor_art_ratio = (mordor_art / mordor_word_count_corpus) 
    mordor_prev_ratio = (mordor_prev / mordor_word_count_corpus) 
    mordor_post_ratio = (mordor_post / mordor_word_count_corpus) 
    mordor_pro_ratio = (mordor_pro / mordor_word_count_corpus) 
    mordor_conj_ratio = (mordor_conj / mordor_word_count_corpus) 
    mordor_inj_ratio = (mordor_inj / mordor_word_count_corpus) 
    mordor_det_ratio = (mordor_det / mordor_word_count_corpus)
    mordor_negation_ratio = (mordor_negation / mordor_word_count_corpus) 
    mordor_funct_words_ratio = (mordor_funct_words / mordor_word_count_corpus) 
    

    gondor = [d for d in dialogue_docs if "gondor" in d] # a Gondor nevű karakterhez tartozó dokumentumok listája
    gondor_idx = [meta2idx[d] for d in gondor] # a Gondor nevű karakterhez tartozó dokumentumok indexeinek listája
    gondor_third = len(gondor_idx) // 3
    gondor_idx_first_third = gondor_idx[:gondor_third]
    gondor_idx_second_third = gondor_idx[gondor_third:2*gondor_third]
    gondor_idx_third_third = gondor_idx[2*gondor_third:]

    gondor_words_corpus = [lemmatized_clean[i] for i in gondor_idx_third_third]
    gondor_words_flatten_corpus = [item for sublist in gondor_words_corpus for item in sublist]
    gondor_word_count_corpus = len(gondor_words_flatten_corpus) 
    gondor_unique_word_count_corpus = len(set(gondor_words_flatten_corpus))
    gondor_lexical_diversity_corpus = gondor_unique_word_count_corpus / gondor_word_count_corpus
    gondor_word_avg_corpus = gondor_word_count_corpus / len(gondor_words_corpus)

    gondor_words_raw = [lemmatized_raw[i] for i in gondor_idx_third_third]
    gondor_words_flatten_raw = [item for sublist in gondor_words_raw for item in sublist]
    gondor_word_count_raw = len(gondor_words_flatten_raw)
    gondor_hes = sum([lemmatized_raw[i].count("hes") for i in gondor_idx_third_third])
    gondor_laugh = sum([lemmatized_raw[i].count("Laugh") for i in gondor_idx_third_third])
    gondor_hum = sum([lemmatized_raw[i].count("hum") for i in gondor_idx_third_third])
    gondor_kerdojel = sum([lemmatized_raw[i].count("?") for i in gondor_idx_third_third])
    gondor_hes_ratio = gondor_hes / gondor_word_count_raw
    gondor_laugh_ratio = gondor_laugh / gondor_word_count_raw
    gondor_hum_ratio = gondor_hum / gondor_word_count_raw
    gondor_kerdojel_ratio = gondor_kerdojel / gondor_word_count_raw
    
    gondor_anger = sum([emotion[i].count("anger") for i in gondor_idx_third_third])
    gondor_disgust = sum([emotion[i].count("disgust") for i in gondor_idx_third_third])
    gondor_fear = sum([emotion[i].count("fear") for i in gondor_idx_third_third])
    gondor_happiness = sum([emotion[i].count("joy") for i in gondor_idx_third_third])
    gondor_sadness = sum([emotion[i].count("sadness") for i in gondor_idx_third_third])
    gondor_no_emotion = sum([emotion[i].count("none_of_them") for i in gondor_idx_third_third])
    gondor_anger_ratio = (gondor_anger / len(gondor_idx_third_third))
    gondor_disgust_ratio = (gondor_disgust / len(gondor_idx_third_third))
    gondor_fear_ratio = (gondor_fear / len(gondor_idx_third_third)) 
    gondor_happiness_ratio = gondor_happiness / len(gondor_idx_third_third)
    gondor_sadness_ratio = gondor_sadness / len(gondor_idx_third_third)
    gondor_no_emotion_ratio = gondor_no_emotion / len(gondor_idx_third_third)
    
    gondor_positive = sum(positive.count(item) for i in gondor_idx_third_third for item in sentiment[i])
    gondor_negative = sum(negative.count(item) for i in gondor_idx_third_third for item in sentiment[i])
    gondor_neutral = sum(neutral.count(item) for i in gondor_idx_third_third for item in sentiment[i])
    gondor_sentiment_sum = gondor_positive + gondor_negative + gondor_neutral
    gondor_positive_ratio = (gondor_positive / gondor_sentiment_sum)
    gondor_negative_ratio = (gondor_negative / gondor_sentiment_sum)
    gondor_neutral_ratio = (gondor_neutral / gondor_sentiment_sum) 

    gondor_s_first_person = sum(len(pattern_1.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_s_second_person = sum(len(pattern_2.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_s_third_person = sum(len(pattern_3.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_p_first_person = sum(len(pattern_4.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_p_second_person = sum(len(pattern_5.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_p_third_person = sum(len(pattern_6.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_present = sum(len(pattern_7.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_past = sum(len(pattern_8.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_future = sum(len(pattern_9.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_cond = sum(len(pattern_10.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_verb = sum(len(pattern_11.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_noun = sum(len(pattern_12.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_adj = sum(len(pattern_13.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_num = sum(len(pattern_14.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_adv = sum(len(pattern_15.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_art = sum(len(pattern_16.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_prev = sum(len(pattern_17.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_post = sum(len(pattern_18.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_pro = sum(len(pattern_19.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_conj = sum(len(pattern_20.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_inj = sum(len(pattern_21.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_det = sum(len(pattern_22.findall(item)) for i in gondor_idx_third_third for item in pos[i])
    gondor_negation = sum([negation.count(word) for i in gondor_idx_third_third for word in lemmatized_clean[i]])
    gondor_funct_words = sum(len(funct_word.findall(item)) for funct_word in funct_words for i in gondor_idx_third_third for item in pos[i])
    

    gondor_s_first_person_ratio = (gondor_s_first_person / gondor_word_count_corpus) 
    gondor_s_second_person_ratio = (gondor_s_second_person / gondor_word_count_corpus) 
    gondor_s_third_person_ratio = (gondor_s_third_person / gondor_word_count_corpus) 
    gondor_p_first_person_ratio = (gondor_p_first_person / gondor_word_count_corpus) 
    gondor_p_second_person_ratio = (gondor_p_second_person / gondor_word_count_corpus) 
    gondor_p_third_person_ratio = (gondor_p_third_person / gondor_word_count_corpus) 
    gondor_present_ratio = (gondor_present / gondor_word_count_corpus) 
    gondor_past_ratio = (gondor_past / gondor_word_count_corpus) 
    gondor_future_ratio = (gondor_future / gondor_word_count_corpus) 
    gondor_cond_ratio = (gondor_cond / gondor_word_count_corpus) 
    gondor_verb_ratio = (gondor_verb / gondor_word_count_corpus) 
    gondor_noun_ratio = (gondor_noun / gondor_word_count_corpus) 
    gondor_adj_ratio = (gondor_adj / gondor_word_count_corpus) 
    gondor_adv_ratio = (gondor_adv / gondor_word_count_corpus) 
    gondor_num_ratio = (gondor_num / gondor_word_count_corpus) 
    gondor_art_ratio = (gondor_art / gondor_word_count_corpus) 
    gondor_prev_ratio = (gondor_prev / gondor_word_count_corpus) 
    gondor_post_ratio = (gondor_post / gondor_word_count_corpus) 
    gondor_pro_ratio = (gondor_pro / gondor_word_count_corpus) 
    gondor_conj_ratio = (gondor_conj / gondor_word_count_corpus) 
    gondor_inj_ratio = (gondor_inj / gondor_word_count_corpus) 
    gondor_det_ratio = (gondor_det / gondor_word_count_corpus)
    gondor_negation_ratio = (gondor_negation / gondor_word_count_corpus) 
    gondor_funct_words_ratio = (gondor_funct_words / gondor_word_count_corpus) 

    all_words = gondor_word_count_corpus + mordor_word_count_corpus
    mordor_speech_ratio_corpus = (mordor_word_count_corpus / all_words) * 100
    gondor_speech_ratio_corpus = (gondor_word_count_corpus / all_words) * 100
    
    results_mordor = {
        "PairNo": pair,
        "LabName": "Mordor",
        "lexical_diversity": mordor_lexical_diversity_corpus,
        "word_avg": mordor_word_avg_corpus,
        "speech_ratio": mordor_speech_ratio_corpus,
        "hes": mordor_hes_ratio,
        "laugh": mordor_laugh_ratio,
        "question": mordor_kerdojel_ratio,
        "hum": mordor_hum_ratio,
        "s_first_person": mordor_s_first_person_ratio,
        "s_second_person": mordor_s_second_person_ratio,
        "s_third_person": mordor_s_third_person_ratio,
        "p_first_person": mordor_p_first_person_ratio,
        "p_second_person": mordor_p_second_person_ratio,
        "p_third_person": mordor_p_third_person_ratio,
        "present": mordor_present_ratio,
        "past": mordor_past_ratio,
        "future": mordor_future_ratio,
        "cond": mordor_cond_ratio,
        "verb": mordor_verb_ratio,
        "noun": mordor_noun_ratio,
        "adj": mordor_adj_ratio,
        "adv": mordor_adv_ratio,
        "num": mordor_num_ratio,
        "article": mordor_art_ratio,
        "preverb": mordor_prev_ratio,
        "postverb": mordor_post_ratio,
        "pronoun": mordor_pro_ratio,
        "conjunction": mordor_conj_ratio,
        "interjection": mordor_inj_ratio,
        "det": mordor_det_ratio,
        "funct_words": mordor_funct_words_ratio,
        "negation": mordor_negation_ratio,
        "happiness": mordor_happiness_ratio,
        "sadness": mordor_sadness_ratio,
        "anger": mordor_anger_ratio,
        "fear": mordor_fear_ratio,
        "disgust": mordor_disgust_ratio,
        "no_emotion": mordor_no_emotion_ratio,
        "positive": mordor_positive_ratio,
        "negative": mordor_negative_ratio,
        "neutral": mordor_neutral_ratio,
    }

    results_gondor = {
        "PairNo": pair,
        "LabName": "Gondor",
        "lexical_diversity": gondor_lexical_diversity_corpus,
        "word_avg": gondor_word_avg_corpus,
        "speech_ratio": gondor_speech_ratio_corpus,
        "hes": gondor_hes_ratio,
        "laugh": gondor_laugh_ratio,
        "question": gondor_kerdojel_ratio,
        "hum": gondor_hum_ratio,
        "s_first_person": gondor_s_first_person_ratio,
        "s_second_person": gondor_s_second_person_ratio,
        "s_third_person": gondor_s_third_person_ratio,
        "p_first_person": gondor_p_first_person_ratio,
        "p_second_person": gondor_p_second_person_ratio,
        "p_third_person": gondor_p_third_person_ratio,
        "present": gondor_present_ratio,
        "past": gondor_past_ratio,
        "future": gondor_future_ratio,
        "cond": gondor_cond_ratio,
        "verb": gondor_verb_ratio,
        "noun": gondor_noun_ratio,
        "adj": gondor_adj_ratio,
        "adv": gondor_adv_ratio,
        "num": gondor_num_ratio,
        "article": gondor_art_ratio,
        "preverb": gondor_prev_ratio,
        "postverb": gondor_post_ratio,
        "pronoun": gondor_pro_ratio,
        "conjunction": gondor_conj_ratio,
        "interjection": gondor_inj_ratio,
        "det": gondor_det_ratio,
        "funct_words": gondor_funct_words_ratio,
        "negation": gondor_negation_ratio,
        "happiness": gondor_happiness_ratio,
        "sadness": gondor_sadness_ratio,
        "anger": gondor_anger_ratio,
        "fear": gondor_fear_ratio,
        "disgust": gondor_disgust_ratio,
        "no_emotion": gondor_no_emotion_ratio,
        "positive": gondor_positive_ratio,
        "negative": gondor_negative_ratio,
        "neutral": gondor_neutral_ratio,  
    }
    results_list_mordor_third_third.append(results_mordor)
    results_list_gondor_third_third.append(results_gondor)

In [29]:
df_results_mordor_third_third = pd.DataFrame(results_list_mordor_third_third)
df_results_gondor_third_third = pd.DataFrame(results_list_gondor_third_third)

## Add "condition" as a feature to the dataframes and save the dataframes as csv-s

In [18]:
demographics_df = pd.read_csv('../materials/demog_vars_individual.csv')
demographics_df = demographics_df.rename(columns={'pair_no': 'PairNo', 'lab': 'LabName'})
demographics_df.drop(columns=['age','gender','handedness'], inplace=True)


In [None]:
df_results_mordor['PairNo'] = df_results_mordor['PairNo'].astype(str)
df_results_gondor['PairNo'] = df_results_gondor['PairNo'].astype(str)
demographics_df['PairNo'] = demographics_df['PairNo'].astype(str)
linguistic_demog_features_mordor = pd.merge(df_results_mordor, demographics_df, on=["PairNo", "LabName"], how="left")
linguistic_demog_features_gondor = pd.merge(df_results_gondor, demographics_df, on=["PairNo", "LabName"], how="left")

In [19]:
df_results_mordor_first_third['PairNo'] = df_results_mordor_first_third['PairNo'].astype(str)
df_results_gondor_first_third['PairNo'] = df_results_gondor_first_third['PairNo'].astype(str)
demographics_df['PairNo'] = demographics_df['PairNo'].astype(str)
linguistic_demog_features_mordor_first_third = pd.merge(df_results_mordor_first_third, demographics_df, on=["PairNo", "LabName"], how="left")
linguistic_demog_features_gondor_first_third = pd.merge(df_results_gondor_first_third, demographics_df, on=["PairNo", "LabName"], how="left")

In [25]:
df_results_mordor_second_third['PairNo'] = df_results_mordor_second_third['PairNo'].astype(str)
df_results_gondor_first_third['PairNo'] = df_results_gondor_first_third['PairNo'].astype(str)
demographics_df['PairNo'] = demographics_df['PairNo'].astype(str)
linguistic_demog_features_mordor_second_third = pd.merge(df_results_mordor_second_third, demographics_df, on=["PairNo", "LabName"], how="left")
linguistic_demog_features_gondor_second_third = pd.merge(df_results_gondor_second_third, demographics_df, on=["PairNo", "LabName"], how="left")

In [30]:
df_results_mordor_third_third['PairNo'] = df_results_mordor_third_third['PairNo'].astype(str)
df_results_gondor_third_third['PairNo'] = df_results_gondor_third_third['PairNo'].astype(str)
demographics_df['PairNo'] = demographics_df['PairNo'].astype(str)
linguistic_demog_features_mordor_third_third = pd.merge(df_results_mordor_third_third, demographics_df, on=["PairNo", "LabName"], how="left")
linguistic_demog_features_gondor_third_third = pd.merge(df_results_gondor_third_third, demographics_df, on=["PairNo", "LabName"], how="left")

In [None]:
linguistic_demog_features_gondor.to_csv("../results/linguistic_analysis_condition_gondor.csv", index=False)
linguistic_demog_features_mordor.to_csv("../results/linguistic_analysis_condition_mordor.csv", index=False)

In [None]:
df_results_mordor_gondor_124 = pd.merge(linguistic_demog_features_gondor, linguistic_demog_features_mordor, on=["PairNo", "condition"], suffixes=('_mordor', '_gondor'))
df_results_mordor_gondor_124 = df_results_mordor_gondor_124.drop(columns=['LabName_mordor', 'LabName_gondor'])
df_results_mordor_gondor_248 = pd.concat([linguistic_demog_features_mordor, linguistic_demog_features_gondor], axis=0)

In [20]:
df_results_mordor_gondor_124_first_third = pd.merge(linguistic_demog_features_gondor_first_third, linguistic_demog_features_mordor_first_third, on=["PairNo", "condition"], suffixes=('_mordor', '_gondor'))
df_results_mordor_gondor_124_first_third = df_results_mordor_gondor_124_first_third.drop(columns=['LabName_mordor', 'LabName_gondor'])
df_results_mordor_gondor_248_first_third = pd.concat([linguistic_demog_features_mordor_first_third, linguistic_demog_features_gondor_first_third], axis=0)

In [26]:
df_results_mordor_gondor_124_second_third = pd.merge(linguistic_demog_features_gondor_second_third, linguistic_demog_features_mordor_second_third, on=["PairNo", "condition"], suffixes=('_mordor', '_gondor'))
df_results_mordor_gondor_124_second_third = df_results_mordor_gondor_124_second_third.drop(columns=['LabName_mordor', 'LabName_gondor'])
df_results_mordor_gondor_248_second_third = pd.concat([linguistic_demog_features_mordor_second_third, linguistic_demog_features_gondor_second_third], axis=0)

In [31]:
df_results_mordor_gondor_124_third_third = pd.merge(linguistic_demog_features_gondor_third_third, linguistic_demog_features_mordor_third_third, on=["PairNo", "condition"], suffixes=('_mordor', '_gondor'))
df_results_mordor_gondor_124_third_third = df_results_mordor_gondor_124_third_third.drop(columns=['LabName_mordor', 'LabName_gondor'])
df_results_mordor_gondor_248_third_third = pd.concat([linguistic_demog_features_mordor_third_third, linguistic_demog_features_gondor_third_third], axis=0)


In [None]:
df_results_mordor_gondor_124.to_csv("../results/linguistic_analysis_condition_mordor_gondor_124.csv", index=False)
df_results_mordor_gondor_248.to_csv("../results/linguistic_analysis_condition_mordor_gondor_248.csv", index=False)

In [21]:
df_results_mordor_gondor_124_first_third.to_csv("../results/linguistic_analysis_condition_mordor_gondor_first_third_124.csv", index=False)
df_results_mordor_gondor_248_first_third.to_csv("../results/linguistic_analysis_condition_mordor_gondor_first_third_248.csv", index=False)

In [27]:
df_results_mordor_gondor_124_second_third.to_csv("../results/linguistic_analysis_condition_mordor_gondor_second_third_124.csv", index=False)
df_results_mordor_gondor_248_second_third.to_csv("../results/linguistic_analysis_condition_mordor_gondor_second_third_248.csv", index=False)

In [32]:
df_results_mordor_gondor_124_third_third.to_csv("../results/linguistic_analysis_condition_mordor_gondor_third_third_124.csv", index=False)
df_results_mordor_gondor_248_third_third.to_csv("../results/linguistic_analysis_condition_mordor_gondor_third_third_248.csv", index=False)

In [10]:
whole_data_df = pd.read_csv('../results/linguistic_analysis_condition_mordor_gondor_248.csv')
grouped_df = whole_data_df.groupby('condition')
grouped_dict = {}
for name, group in grouped_df:
    grouped_dict[name] = group

baseline_df = grouped_dict["baseline"]
unimodal_df = grouped_dict["unimodal"]
unfamiliar_df = grouped_dict["unfamiliar"]

unfamiliar_df.to_csv("../results/unfamiliar_linguistic_analysis_248.csv", index=False)
baseline_df.to_csv("../results/baseline_linguistic_analysis_248.csv", index=False)
unimodal_df.to_csv("../results/unimodal_linguistic_analysis_248.csv", index=False)

In [11]:
whole_data_df = pd.read_csv('../results/linguistic_analysis_condition_mordor_gondor_124.csv')
grouped_df = whole_data_df.groupby('condition')
grouped_dict = {}
for name, group in grouped_df:
    grouped_dict[name] = group

baseline_df = grouped_dict["baseline"]
unimodal_df = grouped_dict["unimodal"]
unfamiliar_df = grouped_dict["unfamiliar"]

unfamiliar_df.to_csv("../results/unfamiliar_linguistic_analysis_124.csv", index=False)
baseline_df.to_csv("../results/baseline_linguistic_analysis_124.csv", index=False)
unimodal_df.to_csv("../results/unimodal_linguistic_analysis_124.csv", index=False)