# FKGL and DCRS

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install pandas nltk



## FKGL

In [None]:
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import cmudict

In [None]:
nltk.download('punkt')
nltk.download('cmudict')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Unzipping corpora/cmudict.zip.


True

In [None]:
d = cmudict.dict()

In [None]:
def count_syllables(word):
    return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]][0] if word.lower() in d else 1

In [None]:
def flesch_kincaid_grade_level(text):
    sentences = sent_tokenize(text)
    words = word_tokenize(text)
    syllable_count = sum(count_syllables(word) for word in words)
    word_count = len(words)
    sentence_count = len(sentences)
    if word_count == 0 or sentence_count == 0:
        return 0
    return 0.39 * (word_count / sentence_count) + 11.8 * (syllable_count / word_count) - 15.59


### Pegasus

In [None]:
peg_df = pd.read_csv('/content/drive/MyDrive/DISSERTATION/Dataset/test_peg_v4.csv')

In [None]:
peg_df['generated_fk_grade'] = peg_df['generated_summary'].apply(flesch_kincaid_grade_level)

In [None]:
average_generated_fk_grade = peg_df['generated_fk_grade'].mean()
print("Average generated FK grade:", average_generated_fk_grade)

Average generated FK grade: 14.724769516742718


In [None]:
peg_df.to_csv('/content/drive/MyDrive/DISSERTATION/Results/Readability/Pegasus/peg_v4_fkgl.csv', index=False)

### BART

In [None]:
bart_df = pd.read_csv('/content/drive/MyDrive/DISSERTATION/Dataset/test_bart_v2.csv')

In [None]:
bart_df['generated_fk_grade'] = bart_df['generated_summary'].apply(flesch_kincaid_grade_level)

In [None]:
average_generated_fk_grade = bart_df['generated_fk_grade'].mean()
print("Average generated FK grade:", average_generated_fk_grade)

Average generated FK grade: 14.213430028721156


In [None]:
bart_df.to_csv('/content/drive/MyDrive/DISSERTATION/Results/Readability/BART/bart_v2_fkgl.csv', index=False)

## DCRS

In [None]:
import pandas as pd
import nltk
from nltk.corpus import words, cmudict
from nltk.tokenize import word_tokenize, sent_tokenize

# Ensure you have the necessary NLTK resources
nltk.download('punkt')
nltk.download('words')
nltk.download('cmudict')

# Load the Dale-Chall easy word list (4th-grade words)
easy_words = set(words.words())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


In [None]:
def dale_chall_readability_score(text):
    sentences = sent_tokenize(text)
    words_list = word_tokenize(text)
    difficult_words = [word for word in words_list if word.lower() not in easy_words]
    difficult_words_count = len(difficult_words)
    word_count = len(words_list)
    sentence_count = len(sentences)

    if word_count == 0 or sentence_count == 0:
        return 0

    average_sentence_length = word_count / sentence_count
    percentage_difficult_words = (difficult_words_count / word_count) * 100

    raw_score = 0.1579 * percentage_difficult_words + 0.0496 * average_sentence_length
    if percentage_difficult_words > 5:
        raw_score += 3.6365

    return raw_score

### Pegasus

In [None]:
peg_df = pd.read_csv('/content/drive/MyDrive/DISSERTATION/Dataset/test_peg_v4.csv')

In [None]:
peg_df['generated_dale_chall'] = peg_df['generated_summary'].apply(dale_chall_readability_score)

In [None]:
average_generated_dale_chall = peg_df['generated_dale_chall'].mean()
print("Average generated Dale-Chall readability score:", average_generated_dale_chall)

Average generated Dale-Chall readability score: 10.61790521242583


In [None]:
peg_df.to_csv('/content/drive/MyDrive/DISSERTATION/Results/Readability/Pegasus/peg_v4_dcrs.csv', index=False)

### BART

In [None]:
bart_df = pd.read_csv('/content/drive/MyDrive/DISSERTATION/Dataset/test_bart_v2.csv')

In [None]:
bart_df['generated_dale_chall'] = bart_df['generated_summary'].apply(dale_chall_readability_score)

In [None]:
average_generated_dale_chall = bart_df['generated_dale_chall'].mean()
print("Average generated Dale-Chall readability score:", average_generated_dale_chall)

Average generated Dale-Chall readability score: 10.214336145545726


In [None]:
bart_df.to_csv('/content/drive/MyDrive/DISSERTATION/Results/Readability/BART/bart_v2_dcrs.csv', index=False)