# Language Generation Evaluation

### Analysis of the Europarl Dataset

In [None]:
import nltk
import numpy as np
import Levenshtein
from collections import Counter
from nltk.util import ngrams
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
nltk.download("punkt")

In [None]:
# Load the dataset
def load_data(danish_file, english_file):
    with open(danish_file, "r", encoding="utf-8") as f_da, open(english_file, "r", encoding="utf-8") as f_en:
        danish_sentences = [line.strip() for line in f_da.readlines()]
        english_sentences = [line.strip() for line in f_en.readlines()]
    return danish_sentences, english_sentences

danish_sentences, english_sentences = load_data("da-en/europarl-v7.da-en.da", "da-en/europarl-v7.da-en.en") # update the path to your files

# Tokenization
danish_tokens = [word_tokenize(sent.lower()) for sent in danish_sentences]
english_tokens = [word_tokenize(sent.lower()) for sent in english_sentences]

# Flatten lists for word statistics
danish_words = [word for sent in danish_tokens for word in sent]
english_words = [word for sent in english_tokens for word in sent]



In [None]:
num_sentences_da = len(danish_sentences)
num_sentences_en = len(english_sentences)

num_words_da = len(danish_words)
num_words_en = len(english_words)

unique_words_da = len(set(danish_words))
unique_words_en = len(set(english_words))

ttr_da = unique_words_da / num_words_da * 100
ttr_en = unique_words_en / num_words_en * 100

avg_length_da = np.mean([len(sent) for sent in danish_tokens])
avg_length_en = np.mean([len(sent) for sent in english_tokens])

std_length_da = np.std([len(sent) for sent in danish_tokens])
std_length_en = np.std([len(sent) for sent in english_tokens])

sentence_length_ratio = avg_length_da / avg_length_en 

print(f"Total Sentences: Danish = {num_sentences_da}, English = {num_sentences_en}")
print(f"Total Words: Danish = {num_words_da}, English = {num_words_en}")
print(f"Unique Words: Danish = {unique_words_da}, English = {unique_words_en}")
print(f"Type-Token Ratio: Danish = {ttr_da:.2f}%, English = {ttr_en:.2f}%")
print(f"Avg. Sentence Length: Danish = {avg_length_da:.2f}, English = {avg_length_en:.2f}")
print(f"Std Dev Sentence Length: Danish = {std_length_da:.2f}, English = {std_length_en:.2f}")
print(f"Sentence Length Ratio (DA/EN): {sentence_length_ratio:.2f}")

In [None]:
# Out-of-Vocabulary (OOV) Rate
def oov_rate(words, vocabulary):
    return sum(1 for word in words if word not in vocabulary) / len(words) * 100 if words else 0

vocab_da = set(danish_words[:5000])  # Using first 5000 words as a basic vocabulary???
vocab_en = set(english_words[:5000])

oov_danish = oov_rate(danish_words, vocab_da)
oov_english = oov_rate(english_words, vocab_en)

print(f"OOV Rate (Danish): {oov_danish:.2f}%")
print(f"OOV Rate (English): {oov_english:.2f}%")

### Load dependencies (First, look at README)

In [None]:
from rouge import Rouge
from nltk.translate.bleu_score import sentence_bleu
from nltk import word_tokenize
from nltk.translate import meteor
import nltk
nltk.download('wordnet')
nltk.download('punkt_tab')

### Define functions

In [None]:
rouge = Rouge()

def calculate_rouge(candidate, reference):
    scores = rouge.get_scores(candidate, reference)
    return scores

def calculate_bleu(candidate, reference):
    reference_p = [word_tokenize(reference)]
    candidate_p = word_tokenize(candidate)
    score = sentence_bleu(reference_p, candidate_p)
    return score

def calculate_meteor(candidate, reference):
  reference = word_tokenize(reference)
  candidate = word_tokenize(candidate)
  meteor_score = round(meteor([candidate],reference), 4)
  return meteor_score

### Load translation input

In [None]:
# Load the professional translation of sentence 1-27
with open("europarl-v7.da-en.da", "r", encoding="utf-8") as file:
    proTrans = "".join([next(file) for _ in range(27)])

# Load the ChatGPT translation of sentence 1-27
with open("ChatgptEnglishToDanish.txt", "r", encoding="utf-8") as file:
    chatGptTrans = "".join([next(file) for _ in range(27)])

### Perform Rouge test

In [None]:
print(calculate_rouge(chatGptTrans, proTrans))

### Perform BLEU test

In [None]:
print(calculate_bleu(chatGptTrans, proTrans))

### Perform METEOR test

In [None]:
print(calculate_meteor(chatGptTrans, proTrans))