In [2]:
import re

def read_texts():
    '''The function reads all the texts to be compared and returns a list containing each text as an element.'''
    i = 1
    texts = []
    text = input("Enter text " + str(i) + " (press enter to exit):")
    while text:
        texts.append(text)
        i += 1
        text = input("Enter text " + str(i) + " (press enter to exit):")

    return texts

def separate_sentences(text):
    '''The function receives a text and returns a list of sentences within the text.'''
    sentences = re.split(r'[.!?]+', text)
    if sentences[-1] == '':
        del sentences[-1]
    return sentences

def separate_phrases(sentence):
    '''The function receives a sentence and returns a list of phrases within the sentence.'''
    return re.split(r'[,:;]+', sentence)

def separate_words(phrase):
    '''The function receives a phrase and returns a list of words within the phrase.'''
    return phrase.split()

def count_unique_words(word_list):
    '''This function receives a list of words and returns the number of words that appear only once.'''
    freq = dict()
    unique_words = 0
    for word in word_list:
        p = word.lower()
        if p in freq:
            if freq[p] == 1:
                unique_words -= 1
            freq[p] += 1
        else:
            freq[p] = 1
            unique_words += 1

    return unique_words

def count_different_words(word_list):
    '''This function receives a list of words and returns the number of different words used.'''
    freq = dict()
    for word in word_list:
        p = word.lower()
        if p in freq:
            freq[p] += 1
        else:
            freq[p] = 1

    return len(freq)

def compare_signatures(sig_a, sig_b):
    '''This function receives two text signatures and returns the degree of similarity in the signatures.'''
    signature_diff = []
    for size in range(0, 6):
        a = sig_a[size]
        b = sig_b[size]
        subt = abs(a - b)
        signature_diff.append(subt)
    similarity = sum(signature_diff) / 6
    return similarity

def calculate_signature(text):
    '''This function receives a text and returns the signature of the text.'''
    char_count = 0
    phrase_list = []
    word_list = []

    sentence_list = separate_sentences(text)
    for sentence in sentence_list:
        phrase_list += separate_phrases(sentence)
    for phrase in phrase_list:
        word_list += separate_words(phrase)
    for word in word_list:
        char_count += len(word)

    sentence_char_count = 0
    for i in range(0, len(sentence_list)):
        sentence_char_count += len(sentence_list[i])
    phrase_char_count = 0
    for i in range(0, len(phrase_list)):
        phrase_char_count += len(phrase_list[i])

    wal = char_count / len(word_list)
    ttr = count_different_words(word_list) / len(word_list)
    hlr = count_unique_words(word_list) / len(word_list)
    sal = sentence_char_count / len(sentence_list)
    sac = len(phrase_list) / len(sentence_list)
    pal = phrase_char_count / len(phrase_list)

    return [wal, ttr, hlr, sal, sac, pal]

def read_signature():
    '''The function reads the original text and returns a signature to be compared with the provided texts.'''
    print("Welcome to the automatic COH-PIAH detector.")

    original_text = input("Enter the original text")
    original_signature = calculate_signature(original_text)

    return original_signature

def evaluate_texts(texts, ref_sig):
    '''This function receives a list of texts and a reference signature ref_sig and returns the number (1 to n) of the text with the highest probability of being infected with COH-PIAH.'''
    list_size = 1
    similarity_degree = []
    prob_infected_text = 0

    for text in texts:
        text_sig = calculate_signature(text)
        similarity_degree.append(compare_signatures(text_sig, ref_sig))

        if min(similarity_degree) == compare_signatures(text_sig, ref_sig):
            prob_infected_text = list_size

        list_size += 1

    return prob_infected_text

def main():
    '''This is the main function; it calls all the other functions.'''
    ref_sig = read_signature()
    texts = read_texts()
    infected_text = evaluate_texts(texts, ref_sig)

    return f'{'The author of text '}{infected_text}{' is infected with COH-PIAH'}'

In [None]:
'''Reading the code:''' 
main()