## Implementation

#### Library Imports and Global Variables

In [198]:
from benchmark_reader import *
import re
import pprint as pp
import random
import torch

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 3060


In [199]:
EN = ''
MT = 'mt'

#### Function Definitions

In [200]:
def saveData(dataset, datatype, folder):

    f_source = open(folder+"/data/src-"+datatype+".txt", "w")
    f_target = open(folder+"/data/tgt-"+datatype+".txt", "w")

    for data in dataset:

        f_source.write(' '.join(data['triple']) + '\n')
        f_target.write(data['sentence'] + '\n')

    f_source.close()
    f_target.close()

def formatData(b, language):
    
    dataset = []

    for entry in b.entries:

        lexicalisations = [] #to store lexicalisations

        if int(entry.size) == 1: #to only get sentences with one triple

            for triple in entry.modifiedtripleset.triples:

                s = triple.s
                p = triple.p
                o = triple.o

                #to handle p when it is made up of multiple words

                words = ''
                for i in range(len(p)):

                    if p[i].isupper(): #when upper case space is added then letter is added

                        words += " " + p[i]
                        
                    else: #letter is added
                        words += p[i]

                p = words

                #replacing special characters with space
                # s = re.sub(r'[,|""_:@#?!&$]', ' ', s)
                # p = re.sub(r'[,|""_:@#?!&$]', ' ', p)
                # o = re.sub(r'[,|""_:@#?!&$]', ' ', o)

                # changing to lower case
                # s = s.lower()
                # p = p.lower()
                # o = o.lower()

                triple = [s, p, o]

            for i in range(entry.count_lexs()):
                if(entry.lexs[i].lang == language): #to only get english. use 'mt' for maltese

                    lex = entry.lexs[i].lex

                    # replacing special characters with space
                    # lex = re.sub(r'[,|""_:@#?!&$]', ' ', lex)

                    #changing to lower case
                    # lex = lex.lower()

                    lexicalisations.append(lex)

            for lex in lexicalisations:

                dataset.append({
                    'triple': triple,
                    'sentence': lex
                })

    return dataset 

def preprocess(language, folder):

    b = Benchmark() #create benchmark object
    files = select_files("xmldata/mt_train.xml") #getting file from data folder
    b.fill_benchmark(files) #parse xml files and fill Benchmark with Entry instances

    traintest_dataset = formatData(b, language) #contains data that will be used for training and testing

    print("Triple formatting: ", traintest_dataset[0]['triple'])
    print("Sentence formatting: ", traintest_dataset[0]['sentence'], "\n")

    b = Benchmark() #create benchmark object
    files = select_files("xmldata/mt_dev.xml") #getting file from data folder
    b.fill_benchmark(files) #parse xml files and fill Benchmark with Entry instances

    val_data = formatData(b, language) #contains data that will be used for validation

    random.seed(42)
    random.shuffle(traintest_dataset) #shuffling the dataset

    train_split = int(len(traintest_dataset)*0.9) #90% of data for training

    train_data = traintest_dataset[:train_split]
    test_data = traintest_dataset[train_split:]

    # create sets of triples for easy comparison
    test_triples = set([tuple(data['triple']) for data in test_data])

    # remove triples from training set that appear in the testing set
    train_data_filtered = [data for data in train_data if tuple(data['triple']) not in test_triples]

    print("Training data: ", len(train_data_filtered), "\n", 
        "Validation data: ", len(val_data), "\n",
        "Testing data: ", len(test_data), sep='')

    saveData(train_data_filtered, 'train', folder)
    saveData(val_data, 'val', folder)
    saveData(test_data, 'test', folder)

### LSTM Model on English Data

In [201]:
preprocess(EN, 'data-en')

Triple formatting:  ['Aarhus_Airport', 'city Served', '"Aarhus, Denmark"']
Sentence formatting:  The Aarhus is the airport of Aarhus, Denmark. 

Training data: 5681
Validation data: 961
Testing data: 769


In [202]:
!onmt_build_vocab -config config/config-lstm-en.yaml -n_sample -1

[2023-05-08 19:32:22,905 INFO] Counter vocab from -1 samples.
[2023-05-08 19:32:22,905 INFO] n_sample=-1: Build vocab on full datasets.
[2023-05-08 19:32:24,945 INFO] Counters src: 3163
[2023-05-08 19:32:24,945 INFO] Counters tgt: 6277


In [203]:
!onmt_train -config config/config-lstm-en.yaml

[2023-05-08 19:32:27,387 INFO] Missing transforms field for corpus_1 data, set to default: [].
[2023-05-08 19:32:27,387 INFO] Missing transforms field for valid data, set to default: [].
[2023-05-08 19:32:27,388 INFO] Parsed 2 corpora from -data.
[2023-05-08 19:32:27,388 INFO] Get special vocabs from Transforms: {'src': [], 'tgt': []}.
[2023-05-08 19:32:27,426 INFO] Building model...
[2023-05-08 19:32:27,738 INFO] NMTModel(
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(3168, 500, padding_idx=1)
        )
      )
      (dropout): Dropout(p=0.3, inplace=False)
    )
    (rnn): LSTM(500, 500, num_layers=2, batch_first=True, dropout=0.3)
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(6288, 500, padding_idx=1)
        )
      )
      (dropout): Dropout(p=0.3, inplace=False)
  

In [204]:
!onmt_translate -model data-en/run/model_step_1000.pt -src data-en/data/src-test.txt -output data-en/results/pred_1000.txt -gpu 0 -verbose 

[2023-05-08 19:33:17,899 INFO] 
SENT 1: ['Alfred_Garth_Jones', 'death', 'Place', '<unk>']
PRED 1: Alfred Garth Jones died in New York.
PRED SCORE: -0.2942

[2023-05-08 19:33:17,899 INFO] 
SENT 2: ['Aleksandre_Guruli', 'club', '<unk>']
PRED 2: Aleksandre Guruli played for the Olympique Lyonnais club.
PRED SCORE: -0.5493

[2023-05-08 19:33:17,900 INFO] 
SENT 3: ['Bakewell_pudding', 'creator', '<unk>', '<unk>', '<unk>', 'in', '<unk>']
PRED 3: Bakewell pudding was the former place of Bakewell pudding
PRED SCORE: -0.3876

[2023-05-08 19:33:17,900 INFO] 
SENT 4: ['Ardmore_Airport_(New_Zealand)', '3rd', 'Runway', 'Length', 'Feet', '<unk>']
PRED 4: The runway name of Ardmore Airport (New Zealand) is 07/25.
PRED SCORE: -0.5458

[2023-05-08 19:33:17,900 INFO] 
SENT 5: ['Texas', 'largest', 'City', 'Houston']
PRED 5: Abilene is the largest city in Texas.
PRED SCORE: -0.5433

[2023-05-08 19:33:17,900 INFO] 
SENT 6: ['Manila', 'leader', 'Title', '<unk>', '<unk>']
PRED 6: The leader name of Atlantic 

### LSTM Model on English Data with GloVe Embeddings

In [205]:
preprocess(EN, 'data-en')

Triple formatting:  ['Aarhus_Airport', 'city Served', '"Aarhus, Denmark"']
Sentence formatting:  The Aarhus is the airport of Aarhus, Denmark. 

Training data: 5681
Validation data: 961
Testing data: 769


In [206]:
!onmt_build_vocab -config config/config-lstm-en-glove.yaml -n_sample -1

[2023-05-08 19:33:24,014 INFO] Counter vocab from -1 samples.
[2023-05-08 19:33:24,014 INFO] n_sample=-1: Build vocab on full datasets.
[2023-05-08 19:33:26,042 INFO] Counters src: 3163
[2023-05-08 19:33:26,042 INFO] Counters tgt: 6277


In [207]:
!onmt_train -config config/config-lstm-en-glove.yaml

[2023-05-08 19:33:28,316 INFO] Missing transforms field for corpus_1 data, set to default: [].
[2023-05-08 19:33:28,316 INFO] Missing transforms field for valid data, set to default: [].
[2023-05-08 19:33:28,316 INFO] Parsed 2 corpora from -data.
[2023-05-08 19:33:28,316 INFO] Get special vocabs from Transforms: {'src': [], 'tgt': []}.
[2023-05-08 19:33:28,351 INFO] Reading decoder embeddings from glove-embeddings/glove.6B.300d.txt
[2023-05-08 19:33:33,332 INFO] 	Found 400000 total vectors in file
[2023-05-08 19:33:33,332 INFO] After filtering to vectors in vocab:
[2023-05-08 19:33:33,333 INFO] 	* dec: 1369 match, 4919 missing, (21.77%)
[2023-05-08 19:33:33,333 INFO] 
Saving decoder embeddings as:
	* dec: data-en/run-glove/example.dec_embeddings.pt
[2023-05-08 19:33:34,241 INFO] Building model...
[2023-05-08 19:33:34,523 INFO] NMTModel(
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(31

In [208]:
!onmt_translate -model data-en/run-glove/model_step_1000.pt -src data-en/data/src-test.txt -output data-en/results/pred_1000_glove.txt -gpu 0 -verbose

[2023-05-08 19:34:26,647 INFO] 
SENT 1: ['Alfred_Garth_Jones', 'death', 'Place', '<unk>']
PRED 1: Alfred Garth Jones died in New York.
PRED SCORE: -0.1903

[2023-05-08 19:34:26,647 INFO] 
SENT 2: ['Aleksandre_Guruli', 'club', '<unk>']
PRED 2: Aleksandre Guruli played for the Italian national under-19 football team.
PRED SCORE: -0.2975

[2023-05-08 19:34:26,647 INFO] 
SENT 3: ['Bakewell_pudding', 'creator', '<unk>', '<unk>', '<unk>', 'in', '<unk>']
PRED 3: Bananaman is the creator of Bananaman.
PRED SCORE: -0.7506

[2023-05-08 19:34:26,647 INFO] 
SENT 4: ['Ardmore_Airport_(New_Zealand)', '3rd', 'Runway', 'Length', 'Feet', '<unk>']
PRED 4: The Ariane 5 has a runway length of 3990.0.
PRED SCORE: -0.5062

[2023-05-08 19:34:26,648 INFO] 
SENT 5: ['Texas', 'largest', 'City', 'Houston']
PRED 5: Arabic is the largest city in Texas.
PRED SCORE: -0.3358

[2023-05-08 19:34:26,648 INFO] 
SENT 6: ['Manila', 'leader', 'Title', '<unk>', '<unk>']
PRED 6: The Mayor of Denmark is the leader of Brazil.
P

### LSTM Model on Maltese Data

In [209]:
preprocess(MT, 'data-mt')

Triple formatting:  ['Aarhus_Airport', 'city Served', '"Aarhus, Denmark"']
Sentence formatting:  L-Aarhus huwa l-ajruport ta’ Aarhus, id-Danimarka. 

Training data: 5681
Validation data: 403
Testing data: 769


In [210]:
!onmt_build_vocab -config config/config-lstm-mt.yaml -n_sample -1

[2023-05-08 19:34:31,900 INFO] Counter vocab from -1 samples.
[2023-05-08 19:34:31,900 INFO] n_sample=-1: Build vocab on full datasets.
[2023-05-08 19:34:33,979 INFO] Counters src: 3163
[2023-05-08 19:34:33,979 INFO] Counters tgt: 8524


In [211]:
!onmt_train -config config/config-lstm-mt.yaml

[2023-05-08 19:34:36,410 INFO] Missing transforms field for corpus_1 data, set to default: [].
[2023-05-08 19:34:36,410 INFO] Missing transforms field for valid data, set to default: [].
[2023-05-08 19:34:36,410 INFO] Parsed 2 corpora from -data.
[2023-05-08 19:34:36,411 INFO] Get special vocabs from Transforms: {'src': [], 'tgt': []}.
[2023-05-08 19:34:36,463 INFO] Building model...
[2023-05-08 19:34:36,836 INFO] NMTModel(
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(3168, 500, padding_idx=1)
        )
      )
      (dropout): Dropout(p=0.3, inplace=False)
    )
    (rnn): LSTM(500, 500, num_layers=2, batch_first=True, dropout=0.3)
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(8528, 500, padding_idx=1)
        )
      )
      (dropout): Dropout(p=0.3, inplace=False)
  

In [212]:
!onmt_translate -model data-mt/run/model_step_1000.pt -src data-mt/data/src-test.txt -output data-mt/results/pred_1000.txt -gpu 0 -verbose

[2023-05-08 19:35:25,684 INFO] 
SENT 1: ['Alfred_Garth_Jones', 'death', 'Place', '<unk>']
PRED 1: Alfred Garth Jones meret fl-Ingilterra.
PRED SCORE: -0.4583

[2023-05-08 19:35:25,684 INFO] 
SENT 2: ['Aleksandre_Guruli', 'club', '<unk>']
PRED 2: Aleksandre Guruli jindika l-futbol Dinamo Batumi.
PRED SCORE: -0.7691

[2023-05-08 19:35:25,684 INFO] 
SENT 3: ['Bakewell_pudding', 'creator', '<unk>', '<unk>', '<unk>', 'in', '<unk>']
PRED 3: Il-pudding tal-Bakewell kien magħmul minn 233 Kesel.
PRED SCORE: -1.5976

[2023-05-08 19:35:25,685 INFO] 
SENT 4: ['Ardmore_Airport_(New_Zealand)', '3rd', 'Runway', 'Length', 'Feet', '<unk>']
PRED 4: It-tul tal-runway tal-Ajruport għandha l-isem tal-runway tal-Ajruport
PRED SCORE: -0.3714

[2023-05-08 19:35:25,685 INFO] 
SENT 5: ['Texas', 'largest', 'City', 'Houston']
PRED 5: L-akbarbelt hija l-akbarbelt
PRED SCORE: -0.5985

[2023-05-08 19:35:25,685 INFO] 
SENT 6: ['Manila', 'leader', 'Title', '<unk>', '<unk>']
PRED 6: Il-mexxej hija t-titolu
PRED SCORE: 

## Evaluation

### Different ways of preprocessing data

#### English

`python 2023-Challenge-main\evaluation\automatic\scripts\eval.py -hyp data-en/results/pred_1000.txt -ref data-en/data/tgt-test.txt -nr 1 -m bleu,ter -lng en`

<b>1. Data is kept as is:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus_Airport', 'cityServed', '"Aarhus, Denmark"'] <br>
Sentence formatting: &nbsp;The Aarhus is the airport of Aarhus, Denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 16.4104  |
| TER      | 0.8      |


<b>2. Regular expression is used to remove special characters:</b><br><br>
Triple formatting:  &emsp;&emsp;['Aarhus Airport', 'cityServed', ' Aarhus  Denmark '] <br>
Sentence formatting:  &nbsp;The Aarhus is the airport of Aarhus  Denmark.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 13.3161  |
| TER      | 0.97     |

<b>3. Everything is set to lowercase:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus_airport', 'cityserved', '"aarhus, denmark"'] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus, denmark.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 14.6464  |
| TER      | 0.97     |

<b>4. Splitting the predicate into separate words:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus_Airport', 'city Served', '"Aarhus, Denmark"'] <br>
Sentence formatting: &nbsp;The Aarhus is the airport of Aarhus, Denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 20.1420  |
| TER      | 0.68     |

<b>5. RE + Lowercase:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus airport', 'cityserved', ' aarhus  denmark '] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus  denmark.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 14.7333  |
| TER      | 0.83     |

<b>6. RE + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp; ['Aarhus Airport', 'city Served', ' Aarhus  Denmark '] <br>
Sentence formatting: &nbsp;The Aarhus is the airport of Aarhus  Denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 17.6363  |
| TER      | 0.72     |

<b>7. Lowercase + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus_airport', 'city served', '"aarhus, denmark"'] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus, denmark.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 15.3500  |
| TER      | 0.91     |

<b>8. RE + Lowercase + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus airport', 'city served', ' aarhus  denmark '] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus  denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 15.5132  |
| TER      | 0.80     |

#### English with GloVE


`python 2023-Challenge-main\evaluation\automatic\scripts\eval.py -hyp data-en/results/pred_1000_glove.txt -ref data-en/data/tgt-test.txt -nr 1 -m bleu,ter -lng en`

<b>1. Data is kept as is:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus_Airport', 'cityServed', '"Aarhus, Denmark"'] <br>
Sentence formatting: &nbsp;The Aarhus is the airport of Aarhus, Denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 18.0305  |
| TER      | 0.74     |


<b>2. Regular expression is used to remove special characters:</b><br><br>
Triple formatting:  &emsp;&emsp;['Aarhus Airport', 'cityServed', ' Aarhus  Denmark '] <br>
Sentence formatting:  &nbsp;The Aarhus is the airport of Aarhus  Denmark.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 14.643   |
| TER      | 0.77     |

<b>3. Everything is set to lowercase:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus_airport', 'cityserved', '"aarhus, denmark"'] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus, denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 16.2314  |
| TER      | 0.75     |

<b>4. Splitting the predicate into separate words:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus_Airport', 'city Served', '"Aarhus, Denmark"'] <br>
Sentence formatting: &nbsp;The Aarhus is the airport of Aarhus, Denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 17.4939  |
| TER      | 0.72     |

<b>5. RE + Lowercase:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus airport', 'cityserved', ' aarhus  denmark '] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus  denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 15.5869  |
| TER      | 0.72     |

<b>6. RE + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus Airport', 'city Served', ' Aarhus  Denmark '] <br>
Sentence formatting: &nbsp;The Aarhus is the airport of Aarhus  Denmark.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 16.2471  |
| TER      | 0.75     |

<b>7. Lowercase + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp; <br>
Sentence formatting: &nbsp; <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 17.7529  |
| TER      | 0.74     |

<b>8. RE + Lowercase + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus airport', 'city served', ' aarhus  denmark '] <br>
Sentence formatting: &nbsp;the aarhus is the airport of aarhus  denmark. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 15.6912  |
| TER      | 0.73     |

#### Maltese

`python 2023-Challenge-main\evaluation\automatic\scripts\eval.py -hyp data-mt/results/pred_1000.txt -ref data-mt/data/tgt-test.txt -nr 1 -m bleu,chrf++,ter -lng mt`

<b>1. Data is kept as is:</b><br><br>
Triple formatting: &emsp;&emsp; <br>
Sentence formatting: &nbsp; <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 5.94147  |
| TER      | 0.8      |


<b>2. Regular expression is used to remove special characters:</b><br><br>
Triple formatting:  &emsp;&emsp;['Aarhus Airport', 'cityServed', ' Aarhus  Denmark '] <br>
Sentence formatting:  &nbsp;L-Aarhus huwa l-ajruport ta’ Aarhus  id-Danimarka.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 7.73331  |
| TER      | 0.82     |

<b>3. Everything is set to lowercase:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus_airport', 'cityserved', '"aarhus, denmark"'] <br>
Sentence formatting: &nbsp;l-aarhus huwa l-ajruport ta’ aarhus, id-danimarka.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 4.5222   |
| TER      | 0.88     |

<b>4. Splitting the predicate into separate words:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus_Airport', 'city Served', '"Aarhus, Denmark"'] <br>
Sentence formatting: &nbsp;L-Aarhus huwa l-ajruport ta’ Aarhus, id-Danimarka.  <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 6.39957  |
| TER      | 0.80     |

<b>5. RE + Lowercase:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus airport', 'cityserved', ' aarhus  denmark '] <br>
Sentence formatting: &nbsp;l-aarhus huwa l-ajruport ta’ aarhus  id-danimarka. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 9.0829   |
| TER      | 0.78     |

<b>6. RE + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp;['Aarhus Airport', 'city Served', ' Aarhus  Denmark '] <br>
Sentence formatting: &nbsp;L-Aarhus huwa l-ajruport ta’ Aarhus  id-Danimarka. <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 7.72222  |
| TER      | 0.88     |

<b>7. Lowercase + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp; <br>
Sentence formatting: &nbsp; <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 7.05491  |
| TER      | 0.84     |

<b>8. RE + Lowercase + Splitting:</b><br><br>
Triple formatting: &emsp;&emsp;['aarhus airport', 'city served', ' aarhus  denmark '] <br>
Sentence formatting: &nbsp;['aarhus airport', 'city served', ' aarhus  denmark '] <br>

| Metric   | Score    |
|----------|----------|
| BLEU     | 7.45765  |
| TER      | 0.82     |


#### Tabulated Results for BLEU scores

|                  | 1        | 2        | 3        | 4        | 5        | 6        | 7        | 8        |
|------------------|----------|----------|----------|----------|----------|----------|----------|----------|
| English          | 16.4104  | 13.3161  | 14.6464  | **20.142**   | 14.7333  | 17.6363  | 15.35    | 15.5132  |
| English w/GloVe  | **18.0305**  | 14.643   | 16.2314  | 17.4939  | 15.5869  | 16.2471  | 17.752  | 15.6912  |
| Maltese          | 5.94147  | 7.73331  | 4.5222   | 6.39957  | **9.0829**   | 7.72222  | 7.05491  | 7.45765  |