<a href="https://colab.research.google.com/github/marco-siino/eloquent2024/blob/main/ELOQUENT_2024_Task_2_Hallucigen_Mistral7B_MSiino.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing dependencies. You might need to tweak the CMAKE_ARGS for the `llama-cpp-python` pip package.

In [1]:
# GPU llama-cpp-python; Starting from version llama-cpp-python==0.1.79, it supports GGUF
!CMAKE_ARGS="-DLLAMA_CUBLAS=on " pip install 'llama-cpp-python>=0.1.79' --force-reinstall --upgrade --no-cache-dir
# For download the models
!pip install huggingface_hub
!pip install datasets
!pip install -U deep-translator

import datasets
from datasets import load_dataset
from deep_translator import GoogleTranslator
import json
import re
import random
import numpy as np
import tqdm.notebook as tqdm

# Seed to shuffle the json training set.
seed_value = 42
random.seed(seed_value)

Collecting llama-cpp-python>=0.1.79
  Downloading llama_cpp_python-0.2.74.tar.gz (49.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 MB[0m [31m191.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting typing-extensions>=4.5.0 (from llama-cpp-python>=0.1.79)
  Downloading typing_extensions-4.11.0-py3-none-any.whl (34 kB)
Collecting numpy>=1.20.0 (from llama-cpp-python>=0.1.79)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m264.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting diskcache>=5.6.1 (from llama-cpp-python>=0.1.79)
  Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━

Downloading an instruction-finetuned Mistral model.

In [2]:
from huggingface_hub import hf_hub_download

model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
model_basename = "mistral-7b-instruct-v0.2.Q6_K.gguf"

model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

# This config has been tested on an RTX 3080 (VRAM of 16GB).
# you might need to tweak with respect to your hardware.
from llama_cpp import Llama
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=4, #16, # CPU cores
    n_batch=800, #8000, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=32, # Change this value based on your model and your GPU VRAM pool.
    n_ctx=8192, # Context window
    logits_all=True
)

mistral-7b-instruct-v0.2.Q6_K.gguf:   0%|          | 0.00/5.94G [00:00<?, ?B/s]

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /root/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.2-GGUF/snapshots/3a6fbf4a41a1d52e415a4958cde6856d34b2db93/mistral-7b-instruct-v0.2.Q6_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loade

# Download the dataset for the three subtasks.

In [8]:
#load the trial data for both English and Swedish
trial_ds = load_dataset("Eloquent/HalluciGen-PG", name="trial")

#load the trial data only for Swedish
trial_ds_sv = load_dataset("Eloquent/HalluciGen-PG", name="trial", split="trial_swedish")

#load the test data for the detection step in both English and Swedish
test_ds = load_dataset("Eloquent/HalluciGen-PG", name="test_detection")

In [15]:
test_ds['test_detection_swedish'][0]

{'id': 0,
 'source': 'Kvinnor kommer att möta högre bilförsäkringspremier.',
 'hyp1': 'Det betyder att kvinnor kan förvänta sig att betala högre priser för sina fordonstilläggsförsäkringar.',
 'hyp2': 'Kvinnor kommer att få högre premier för bilförsäkring.'}

In [18]:
trial_ds['trial_english'][0]

{'id': 0,
 'source': 'The population has declined in some 210 of the 280 municipalities in Sweden, mainly in inland central and northern Sweden.',
 'type': 'antonym',
 'hyp1': "In the majority of Sweden's 280 municipalities, the population has gone up.",
 'hyp2': "In the majority of Sweden's 280 municipalities, the population has gone down.",
 'label': 'hyp1'}

In [26]:
# Convert the dataset object to a list
trial_en_list = list(trial_ds['trial_english'])
trial_sv_list = list(trial_ds['trial_swedish'])

# Shuffle the list
random.shuffle(trial_en_list)
random.shuffle(trial_sv_list)



In [27]:
trial_sv_list

[{'id': 2,
  'source': 'Län med befolkningsminskning kommer att vara Vermillion, Posey och Madison.',
  'type': 'named entity',
  'hyp1': 'Vermillion, Posey och Madison är län som kommer att uppleva minskande befolkning.',
  'hyp2': 'Vermillion, Posey och Marion är län som kommer att uppleva minskande befolkning.',
  'label': 'hyp2'},
 {'id': 12,
  'source': 'Israels Peres uppmanar parterna att återgå till fredssamtalen.',
  'type': 'addition',
  'hyp1': 'Peres i Israel ger en uppmaning att återgå till fredssamtalen.',
  'hyp2': 'Israels president Shimon Peres uppmanade på måndagen världssamfundet att återuppta fredsförhandlingarna med palestinierna och betonade vikten av en tvåstatslösning.',
  'label': 'hyp2'},
 {'id': 11,
  'source': 'Grekisk högerextrem ledare fängslad i väntan på rättegång.',
  'type': 'addition',
  'hyp1': '\xa0I väntan på rättegång har en grekisk högerextrem ledare fängslats.',
  'hyp2': 'En grekisk högerextremistisk ledare har gripits och kommer att ställas inf

# Create few-shot samples from training set.

In [47]:
 prompt_context = """[INST] Which one of hyp1 and hyp2 is not supported by src?

"""

In [56]:
def create_few_shot_samples(train_list,nr_samples,language):

  nr_few_shot_samples = nr_samples

  few_shot_samples = ''

  for i in range(0,nr_few_shot_samples):
    if language=='sv':
      src = GoogleTranslator(source='sv', target='en').translate(train_list[i]['source'])
      hyp1 = GoogleTranslator(source='sv', target='en').translate(train_list[i]['hyp1'])
      hyp2 = GoogleTranslator(source='sv', target='en').translate(train_list[i]['hyp2'])
    else:
      src = train_list[i]['source']
      hyp1 = train_list[i]['hyp1']
      hyp2 = train_list[i]['hyp2']

    label = train_list[i]['label']

    few_shot_samples += '<s>'+prompt_context+"src: "+src+"\nhyp1: "+hyp1 + '\nhyp2: '+hyp2+'\nlabel: [/INST] \n ' + label + "\n</s> \n\n "

  return few_shot_samples

In [63]:
few_shot_samples_en = create_few_shot_samples(trial_en_list,16,'en')

In [65]:
few_shot_samples_sv = create_few_shot_samples(trial_sv_list,20,'sv')

In [66]:
print(few_shot_samples_en)

<s>[INST] Which one of hyp1 and hyp2 is not supported by src?

src: This state of affairs has not changed in more than 100 years, but hopefully at some stage - and perhaps soon - change will come.
hyp1: There has been no change in the status quo in over 100 years, but there is hope that change will soon come. 
hyp2: The state of affairs is1-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-65561-6556
label: [/INST] 
 hyp2
</s> 

 <s>[INST] Which one of hyp1 and hyp2 is not supported by src?

src: The draft agenda as drawn up by the Conference of Presidents pursuant to Rule 95 of the Rules of Procedure has been distributed.
hyp1: The Conference of Presidents hasn't distributed the draft agenda.
hyp2: The Conference of Presidents has di

In [67]:
print(few_shot_samples_sv)

<s>[INST] Which one of hyp1 and hyp2 is not supported by src?

src: Counties with population declines will be Vermillion, Posey and Madison.
hyp1: Vermillion, Posey and Madison are counties that will experience declining populations.
hyp2: Vermillion, Posey and Marion are counties that will experience declining populations.
label: [/INST] 
 hyp2
</s> 

 <s>[INST] Which one of hyp1 and hyp2 is not supported by src?

src: Israel's Peres calls on the parties to return to peace talks.
hyp1: Peres in Israel calls for return to peace talks.
hyp2: Israeli President Shimon Peres on Monday called on the world community to resume peace talks with the Palestinians, stressing the importance of a two-state solution.
label: [/INST] 
 hyp2
</s> 

 <s>[INST] Which one of hyp1 and hyp2 is not supported by src?

src: Greek far-right leader jailed awaiting trial.
hyp1: Pending trial, a Greek far-right leader has been imprisoned.
hyp2: A Greek far-right leader has been arrested and will stand trial, accus

# Run! (English test set)

In [72]:
#replies_list = ['YES','NO']
counter = 0

# List to store the results.
prediction_en = []

for i in range(0,len(test_ds['test_detection_english'])):
  src = test_ds['test_detection_english'][i]['source']
  hyp1 = test_ds['test_detection_english'][i]['hyp1']
  hyp2 = test_ds['test_detection_english'][i]['hyp2']
  id = test_ds['test_detection_english'][i]['id']

  current_sample = prompt_context+"src: "+src+"\nhyp1: "+hyp1 + '\nhyp2: '+hyp2+'\nlabel: [/INST] \n '
  prompt = few_shot_samples_en+current_sample

  #print(prompt)
  #print(current_sample)

  response = lcpp_llm(
        prompt=prompt,
        temperature= 0.2,
        logprobs=1,
        #max_tokens =1
      )

  #print(response)

  answer = str(response["choices"][0]["text"]).strip()
  #print(answer)
  answer = answer[:4]
  #answer = answer.split()[0]
  # Sometime output contains a '.' remove it!
  #answer = answer.replace('.','')

  # If the predicted word is not in emotion list just replace with neutral.
  #if answer not in replies_list:

  #current_sample += answer + " \n "

  print("GENERATED: "+ current_sample+'\n'+answer)

  current_element = {
        "id": id,
        "label": answer,
        "explanation": ""
    }
  prediction_en.append(current_element)


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.89 ms /    16 runs   (    0.49 ms per token,  2028.65 tokens per second)
llama_print_timings: prompt eval time =   71482.99 ms /    85 tokens (  840.98 ms per token,     1.19 tokens per second)
llama_print_timings:        eval time =    1574.25 ms /    15 runs   (  104.95 ms per token,     9.53 tokens per second)
llama_print_timings:       total time =    2264.06 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It has enabled us to support and encourage an exchange of experiences and to pursue activities to raise the level of competence throughout Europe.
hyp1: You can support and encourage an exchange of experiences to raise the level of competence in Europe.
hyp2: We can support and encourage an exchange of experiences to raise the level of competence in Europe.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.60 ms /    16 runs   (    0.48 ms per token,  2104.43 tokens per second)
llama_print_timings: prompt eval time =     305.73 ms /    95 tokens (    3.22 ms per token,   310.73 tokens per second)
llama_print_timings:        eval time =    1645.82 ms /    15 runs   (  109.72 ms per token,     9.11 tokens per second)
llama_print_timings:       total time =    2282.81 ms /   110 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Therefore, I am calling for an increase in the premiums for all varieties of leaf tobacco for the 1999, 2000 and 2001 harvests.
hyp1: I want to see a decrease in the premiums for all varieties of leaf tobacco.
hyp2: I want to see an increase in the premiums for all varieties of leaf tobacco.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.00 ms /    16 runs   (    0.56 ms per token,  1777.38 tokens per second)
llama_print_timings: prompt eval time =     304.41 ms /   105 tokens (    2.90 ms per token,   344.93 tokens per second)
llama_print_timings:        eval time =    1756.98 ms /    15 runs   (  117.13 ms per token,     8.54 tokens per second)
llama_print_timings:       total time =    2457.46 ms /   120 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In other words, a person may be prevented from coming near their victim when there is reason to fear that violent acts will be carried out again.
hyp1: When there is reason to fear that a violent act will be carried out again, a person may not be allowed to come near their victim.
hyp2: When there is reason to fear that a violent act will be carried out again, a person may not be allowed to come near her victim.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      11.37 ms /    16 runs   (    0.71 ms per token,  1407.58 tokens per second)
llama_print_timings: prompt eval time =     284.73 ms /    68 tokens (    4.19 ms per token,   238.82 tokens per second)
llama_print_timings:        eval time =    1882.61 ms /    15 runs   (  125.51 ms per token,     7.97 tokens per second)
llama_print_timings:       total time =    2594.71 ms /    83 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: None of my 34 amendments were adopted and my arguments against the ridiculous administrative burden proposed were not heeded.
hyp1: Our arguments against the ridiculous administrative burden were not heard.
hyp2: My arguments against the ridiculous administrative burden were not heard.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.52 ms /    16 runs   (    0.53 ms per token,  1878.15 tokens per second)
llama_print_timings: prompt eval time =     288.32 ms /    89 tokens (    3.24 ms per token,   308.68 tokens per second)
llama_print_timings:        eval time =    1695.82 ms /    15 runs   (  113.05 ms per token,     8.85 tokens per second)
llama_print_timings:       total time =    2305.82 ms /   104 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The final key feature of the report is its orientation. It aims to protest against certain drifts that are becoming evident, and I will mention two instances.
hyp1: The final main feature of the report is its orientation protesting against drifts, and I will provide two instances of that.
hyp2: The final features of the report are its orientation and two instances.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.66 ms /    16 runs   (    0.60 ms per token,  1656.14 tokens per second)
llama_print_timings: prompt eval time =     293.50 ms /   106 tokens (    2.77 ms per token,   361.16 tokens per second)
llama_print_timings:        eval time =    1571.48 ms /    15 runs   (  104.77 ms per token,     9.55 tokens per second)
llama_print_timings:       total time =    2257.44 ms /   121 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It is high time an appropriate level of social protection was created at European level for those whose work does not fit into the usual pattern.
hyp1: It's time for an appropriate level of social protection for those whose work doesn't fit into the usual pattern.
hyp2: It's time for an appropriate level of social protection for those whose work doesn't fit into the usual pattern, such as firefighters or security officers. 
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.11 ms /    16 runs   (    0.57 ms per token,  1756.50 tokens per second)
llama_print_timings: prompt eval time =     311.66 ms /    94 tokens (    3.32 ms per token,   301.61 tokens per second)
llama_print_timings:        eval time =    1562.39 ms /    15 runs   (  104.16 ms per token,     9.60 tokens per second)
llama_print_timings:       total time =    2187.09 ms /   109 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: These sums, however, have still not produced the desired effect: the substantial fall in the average earnings of the Palestinians is proof of this.
hyp1: The fall in average earnings of Egyptians is proof that these sums have not produced the desired effect.
hyp2: The fall in average earnings of Palestinians is proof that these sums have not produced the desired effect.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      11.58 ms /    16 runs   (    0.72 ms per token,  1381.57 tokens per second)
llama_print_timings: prompt eval time =     326.22 ms /    93 tokens (    3.51 ms per token,   285.09 tokens per second)
llama_print_timings:        eval time =    1778.61 ms /    15 runs   (  118.57 ms per token,     8.43 tokens per second)
llama_print_timings:       total time =    2728.40 ms /   108 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Mr President, the approach adopted by the rapporteur to the Commission's 1999 annual economic report is comprehensive and also sensible.
hyp1: The approach taken by the rapporteur to the 1997 annual economic report is comprehensive and sensible.
hyp2: The approach taken by the rapporteur to the 1999 annual economic report is comprehensive and sensible.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.39 ms /    16 runs   (    0.52 ms per token,  1907.71 tokens per second)
llama_print_timings: prompt eval time =     278.73 ms /    75 tokens (    3.72 ms per token,   269.07 tokens per second)
llama_print_timings:        eval time =    1533.45 ms /    15 runs   (  102.23 ms per token,     9.78 tokens per second)
llama_print_timings:       total time =    2076.46 ms /    90 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The Berlin summit, which will focus solely on Agenda 2000, is of particular importance for the future of the European Union.
hyp1: The future of the European Union is important to the Berlin summit.
hyp2: The Berlin summit is important to the future of the European Union.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.77 ms /    16 runs   (    0.55 ms per token,  1825.44 tokens per second)
llama_print_timings: prompt eval time =     290.76 ms /   106 tokens (    2.74 ms per token,   364.56 tokens per second)
llama_print_timings:        eval time =    1628.39 ms /    15 runs   (  108.56 ms per token,     9.21 tokens per second)
llama_print_timings:       total time =    2298.88 ms /   121 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: As a result, I cannot promise the chairman of the Committee on Budgets that there will be 100 % funding for the financial programme.
hyp1: I can't promise that there will be 100 percent funding for the financial program.
hyp2: Because of this, it's not possible for me to promise the chairman of the Committee on Budgets that there would be 210 % funding for the financial programme.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.68 ms /    16 runs   (    0.54 ms per token,  1843.96 tokens per second)
llama_print_timings: prompt eval time =     290.73 ms /   111 tokens (    2.62 ms per token,   381.79 tokens per second)
llama_print_timings:        eval time =    1659.05 ms /    15 runs   (  110.60 ms per token,     9.04 tokens per second)
llama_print_timings:       total time =    2317.62 ms /   126 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We voted for Amendments Nos 22 to 25, even though they do not take adequate measures to prevent these problems from happening.
hyp1: Amendments Nos 22 to 25 didn't take enough measures to prevent problems from happening, so we voted for them.
hyp2: Amendments Nos 22 to 25 didn't take enough measures to prevent problems from happening, yet we voted for them.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.98 ms /    16 runs   (    0.50 ms per token,  2005.01 tokens per second)
llama_print_timings: prompt eval time =     332.94 ms /   101 tokens (    3.30 ms per token,   303.36 tokens per second)
llama_print_timings:        eval time =    1648.07 ms /    15 runs   (  109.87 ms per token,     9.10 tokens per second)
llama_print_timings:       total time =    2579.74 ms /   116 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Secondly, we say that the type approval rules as from 2005 must provide in precise terms that new cars must be recycling-friendly.
hyp1: The type approval rules from 2005 must give precise terms for new cars to be recycling-friendly.
hyp2: The type approval rules as of 2005 must give precise terms for new cars to be recycling-friendly.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.50 ms /    16 runs   (    0.47 ms per token,  2132.76 tokens per second)
llama_print_timings: prompt eval time =     282.28 ms /    91 tokens (    3.10 ms per token,   322.37 tokens per second)
llama_print_timings:        eval time =    1546.07 ms /    15 runs   (  103.07 ms per token,     9.70 tokens per second)
llama_print_timings:       total time =    2155.78 ms /   106 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: B4-0188/99 by Mrs d'Ancona, on behalf of the PSE Group, on the death penalty against Greg Summers - Texas, USA; Leonard Peltier
hyp1: The death penalty was imposed on Gregory Winters in Texas, USA.
hyp2: The death penalty was imposed on Greg Summers in Texas, USA.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.84 ms /    16 runs   (    0.55 ms per token,  1809.14 tokens per second)
llama_print_timings: prompt eval time =     302.19 ms /   116 tokens (    2.61 ms per token,   383.86 tokens per second)
llama_print_timings:        eval time =    1656.36 ms /    15 runs   (  110.42 ms per token,     9.06 tokens per second)
llama_print_timings:       total time =    2364.10 ms /   131 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We struggle with water on a daily basis in the Netherlands - in the polders, the delta where the Meuse, the Rhine and the Scheldt flow into the sea.
hyp1: In the Netherlands, we struggle with water on a daily basis because of the Meuse, Rhine, Scheldt, Noord, Voer and Dieze
hyp2: In the Netherlands, we struggle with water on a daily basis because of the Meuse, Rhine and Scheldt.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.67 ms /    16 runs   (    0.54 ms per token,  1845.44 tokens per second)
llama_print_timings: prompt eval time =     272.03 ms /    76 tokens (    3.58 ms per token,   279.38 tokens per second)
llama_print_timings:        eval time =    1881.85 ms /    15 runs   (  125.46 ms per token,     7.97 tokens per second)
llama_print_timings:       total time =    2442.03 ms /    91 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It may lower costs by 10 % and is, of course, only applicable over the shorter road legs of the combined transport journey.
hyp1: It is only applicable to the short road legs of the combined transport journey.
hyp2: It is applicable to the road legs of the combined transport journey.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.12 ms /    16 runs   (    0.51 ms per token,  1971.41 tokens per second)
llama_print_timings: prompt eval time =     282.01 ms /    78 tokens (    3.62 ms per token,   276.58 tokens per second)
llama_print_timings:        eval time =    1576.98 ms /    15 runs   (  105.13 ms per token,     9.51 tokens per second)
llama_print_timings:       total time =    2130.59 ms /    93 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Nothing has been done in the 27 years since then, while we have liberalised sea transport, air transport and road transport.
hyp1: Since then, we have restricted air transport, road transport and sea transport.
hyp2: Since then, we have liberalised air transport, road transport and sea transport.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.77 ms /    16 runs   (    0.49 ms per token,  2059.73 tokens per second)
llama_print_timings: prompt eval time =     270.07 ms /    80 tokens (    3.38 ms per token,   296.22 tokens per second)
llama_print_timings:        eval time =    1574.95 ms /    15 runs   (  105.00 ms per token,     9.52 tokens per second)
llama_print_timings:       total time =    2123.34 ms /    95 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: This concerns the applicant countries which are still in the process of membership negotiations, but which in the meantime are being discriminated against.
hyp1: This is about the countries that are still in membership negotiations and are also being discriminated against.
hyp2: The countries that are still in membership negotiations are being discriminated against.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.00 ms /    16 runs   (    0.50 ms per token,  2000.75 tokens per second)
llama_print_timings: prompt eval time =     290.54 ms /   105 tokens (    2.77 ms per token,   361.40 tokens per second)
llama_print_timings:        eval time =    1560.41 ms /    15 runs   (  104.03 ms per token,     9.61 tokens per second)
llama_print_timings:       total time =    2194.69 ms /   120 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I would be grateful if you could tell me whether or not Rambouillet was a European Union initiative or a Franco-British initiative, as I did not quite understand this point.
hyp1: I need to know if Rambouillet is a European Union initiative or a Franco-British initiative.
hyp2: We need to know if Rambouillet is a European Union initiative or a Franco-British initiative.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.21 ms /    16 runs   (    0.58 ms per token,  1736.30 tokens per second)
llama_print_timings: prompt eval time =     287.75 ms /    96 tokens (    3.00 ms per token,   333.62 tokens per second)
llama_print_timings:        eval time =    1915.96 ms /    15 runs   (  127.73 ms per token,     7.83 tokens per second)
llama_print_timings:       total time =    2564.98 ms /   111 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Madam President, I referred earlier to inherited inertia, and this leads me to the third group of amendments, to which Mr Fabre-Aubrespy referred.
hyp1: The third group of amendments was referred to by Ms Fabre- Aubrespy.
hyp2: The third group of amendments was referred to by Mr Fabre- Aubrespy.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.29 ms /    16 runs   (    0.46 ms per token,  2194.49 tokens per second)
llama_print_timings: prompt eval time =     299.20 ms /    74 tokens (    4.04 ms per token,   247.32 tokens per second)
llama_print_timings:        eval time =    1468.04 ms /    15 runs   (   97.87 ms per token,    10.22 tokens per second)
llama_print_timings:       total time =    2018.31 ms /    89 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Other elements have contributed to the proposal to conclude this partnership agreement, especially the very worrying regional context in Central Asia.
hyp1: There are other elements that contributed to the proposal to conclude the partnership agreement.
hyp2: There are no other elements that contributed to the proposal to conclude the partnership agreement.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.44 ms /    16 runs   (    0.53 ms per token,  1896.41 tokens per second)
llama_print_timings: prompt eval time =     415.11 ms /   150 tokens (    2.77 ms per token,   361.35 tokens per second)
llama_print_timings:        eval time =    1603.35 ms /    15 runs   (  106.89 ms per token,     9.36 tokens per second)
llama_print_timings:       total time =    2486.90 ms /   165 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: A way must also be found to tax all kinds of cross-border capital movements which are carried out for profit, in order to put a brake, albeit in a limited way, on the increase in the volume of parasitic, speculative capital.
hyp1: To put a brake on the increase in the volume of speculative capital, which is often regulated under the Tobin Tax, a way must be found to tax cross-border capital movements carried out for profit.
hyp2: To put a brake on the increase in the volume of speculative capital, a way must be found to tax cross-border capital movements carried out for profit.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.75 ms /    16 runs   (    0.48 ms per token,  2065.58 tokens per second)
llama_print_timings: prompt eval time =     275.41 ms /    86 tokens (    3.20 ms per token,   312.27 tokens per second)
llama_print_timings:        eval time =    1519.99 ms /    15 runs   (  101.33 ms per token,     9.87 tokens per second)
llama_print_timings:       total time =    2088.52 ms /   101 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: An important report was also put together under the leadership of Susan Waddington extending the debate to cover the issue of the trade in women.
hyp1: Susan Waddington extended the debate on the trade in women to include an important report.
hyp2: Susan Waddington extended the debate on the trade in women by putting together a critical report. 
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      12.61 ms /    16 runs   (    0.79 ms per token,  1268.73 tokens per second)
llama_print_timings: prompt eval time =     268.51 ms /    69 tokens (    3.89 ms per token,   256.97 tokens per second)
llama_print_timings:        eval time =    1903.24 ms /    15 runs   (  126.88 ms per token,     7.88 tokens per second)
llama_print_timings:       total time =    2590.83 ms /    84 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Over recent years, these have largely been behind the development of a significant number of economic policies, which is good for employment.
hyp1: A lot of economic policies are good for employment. 
hyp2: A lot of economic policies are good for employment because of these.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.73 ms /    16 runs   (    0.48 ms per token,  2070.93 tokens per second)
llama_print_timings: prompt eval time =     309.04 ms /    85 tokens (    3.64 ms per token,   275.04 tokens per second)
llama_print_timings:        eval time =    1582.41 ms /    15 runs   (  105.49 ms per token,     9.48 tokens per second)
llama_print_timings:       total time =    2201.57 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The presidency is very much of this view, as economic and political stability in Jordan is a crucial factor in peace-keeping in the Middle East.
hyp1: Peace-keeping in the Middle East depends on economic and political stability in the north of Jordan.
hyp2: Peace-keeping in the Middle East depends on economic and political stability in Jordan.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.83 ms /    16 runs   (    0.49 ms per token,  2044.47 tokens per second)
llama_print_timings: prompt eval time =     287.86 ms /   105 tokens (    2.74 ms per token,   364.77 tokens per second)
llama_print_timings:        eval time =    1590.37 ms /    15 runs   (  106.02 ms per token,     9.43 tokens per second)
llama_print_timings:       total time =    2241.43 ms /   120 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In addition, the Commission is unable to accept Amendments Nos 27 and 28, because they go beyond the scope of this programme.
hyp1: The Commission can't accept Amendments 27 and 28 because they go past the scope of the programme.
hyp2: The Commission can't accept Amendments 27 and 28 even though they go past the scope of the programme.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       6.94 ms /    14 runs   (    0.50 ms per token,  2017.29 tokens per second)
llama_print_timings: prompt eval time =     287.69 ms /    95 tokens (    3.03 ms per token,   330.21 tokens per second)
llama_print_timings:        eval time =    1350.98 ms /    13 runs   (  103.92 ms per token,     9.62 tokens per second)
llama_print_timings:       total time =    1976.52 ms /   108 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: On the contrary, things came to a head in the crisis and we now know for certain that this Augean stable at any rate is going to be cleaned out.
hyp1: The crisis came to a head and we now know that the Hercules stable will be cleaned out.
hyp2: The crisis came to a head and we now know that the Augean stable will be cleaned out.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      10.48 ms /    16 runs   (    0.66 ms per token,  1526.43 tokens per second)
llama_print_timings: prompt eval time =     280.73 ms /    85 tokens (    3.30 ms per token,   302.78 tokens per second)
llama_print_timings:        eval time =    1878.32 ms /    15 runs   (  125.22 ms per token,     7.99 tokens per second)
llama_print_timings:       total time =    2645.18 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: As you will remember, on 30 October 1997, the Commission presented a Community initiative for the European Capital of Culture.
hyp1: The European Capital of Culture community initiative was presented by the Commission in 1997.
hyp2: The European Capital of Culture was presented by the Commission in 1997.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.93 ms /    16 runs   (    0.50 ms per token,  2018.42 tokens per second)
llama_print_timings: prompt eval time =     287.19 ms /    83 tokens (    3.46 ms per token,   289.00 tokens per second)
llama_print_timings:        eval time =    1611.34 ms /    15 runs   (  107.42 ms per token,     9.31 tokens per second)
llama_print_timings:       total time =    2250.75 ms /    98 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: A second point on the timing: this Committee of Experts will make its report available - if I recall correctly - on 15 March.
hyp1: If I remember correctly, the report will be made available on March 15.
hyp2: If I remember correctly, the report will be made available on March 17.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.83 ms /    16 runs   (    0.49 ms per token,  2044.21 tokens per second)
llama_print_timings: prompt eval time =     281.50 ms /    85 tokens (    3.31 ms per token,   301.96 tokens per second)
llama_print_timings:        eval time =    1569.03 ms /    15 runs   (  104.60 ms per token,     9.56 tokens per second)
llama_print_timings:       total time =    2136.14 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: There are four points in relation to which the Committee on Agriculture and Rural Development has made changes which we believe will be adopted.
hyp1: The Committee on Agriculture and Rural Development made changes that we think will be adopted.
hyp2: The Committee on Agriculture and Rural Development did not make changes that we think will be adopted.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.85 ms /    16 runs   (    0.49 ms per token,  2037.44 tokens per second)
llama_print_timings: prompt eval time =     270.05 ms /    73 tokens (    3.70 ms per token,   270.32 tokens per second)
llama_print_timings:        eval time =    1526.21 ms /    15 runs   (  101.75 ms per token,     9.83 tokens per second)
llama_print_timings:       total time =    2056.80 ms /    88 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In the Nordic countries there are excellent examples of this, and for that reason I recommend that Amendment No 1 be rejected.
hyp1: There are excellent examples of this in the Nordic countries.
hyp2: There are excellent examples of this Amendment in the Nordic countries.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.82 ms /    16 runs   (    0.61 ms per token,  1629.99 tokens per second)
llama_print_timings: prompt eval time =     268.74 ms /    71 tokens (    3.79 ms per token,   264.19 tokens per second)
llama_print_timings:        eval time =    1868.38 ms /    15 runs   (  124.56 ms per token,     8.03 tokens per second)
llama_print_timings:       total time =    2579.56 ms /    86 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It is we politicians who are most concerned, because we understand the link between energy consumption and CO2 emissions.
hyp1: Politicians understand the link between CO2 emissions and energy consumption.
hyp2: Politicians don't understand the link between CO2 emissions and energy consumption.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.05 ms /    16 runs   (    0.50 ms per token,  1988.32 tokens per second)
llama_print_timings: prompt eval time =     298.61 ms /    80 tokens (    3.73 ms per token,   267.91 tokens per second)
llama_print_timings:        eval time =    1679.07 ms /    15 runs   (  111.94 ms per token,     8.93 tokens per second)
llama_print_timings:       total time =    2257.16 ms /    95 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Having pressed the Commission to present this proposal, the Socialist Group - to which I have the privilege of belonging - now supports it enthusiastically.
hyp1: The Communist Group now supports the proposal after pressing the Commission to present it.
hyp2: The Socialist Group now supports the proposal after pressing the Commission to present it.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.25 ms /    16 runs   (    0.52 ms per token,  1939.86 tokens per second)
llama_print_timings: prompt eval time =     293.39 ms /    77 tokens (    3.81 ms per token,   262.45 tokens per second)
llama_print_timings:        eval time =    1566.62 ms /    15 runs   (  104.44 ms per token,     9.57 tokens per second)
llama_print_timings:       total time =    2121.15 ms /    92 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: For example, unemployment is very high in Réunion and in the West Indies. Therefore, we should not devote our efforts to integrating unlimited numbers of immigrants.
hyp1: The West Indies and Réunion have low unemployment.
hyp2: The West Indies and Réunion have high unemployment.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.57 ms /    16 runs   (    0.54 ms per token,  1866.54 tokens per second)
llama_print_timings: prompt eval time =     277.22 ms /    81 tokens (    3.42 ms per token,   292.19 tokens per second)
llama_print_timings:        eval time =    1744.02 ms /    15 runs   (  116.27 ms per token,     8.60 tokens per second)
llama_print_timings:       total time =    2324.43 ms /    96 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: A uniform phased reduction of 3 % will primarily penalise the sectors of production which receive the most aid, such as tobacco growing.
hyp1: Tobacco growing will benefit by a 3 % phased reduction in aid.
hyp2: Tobacco growing will be affected by a 3 % phased reduction in aid.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.20 ms /    16 runs   (    0.57 ms per token,  1739.32 tokens per second)
llama_print_timings: prompt eval time =     310.49 ms /    95 tokens (    3.27 ms per token,   305.97 tokens per second)
llama_print_timings:        eval time =    2192.00 ms /    15 runs   (  146.13 ms per token,     6.84 tokens per second)
llama_print_timings:       total time =    3053.85 ms /   110 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The situation in India is appalling as 50 million women are missing because they are eliminated right from the stage of conception.
hyp1: 50 million women are missing in India due to the fact that they are eliminated before they are conceived.
hyp2: 50 million women are missing in India due to the fact that they are eliminated directly after they have been conceived.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.45 ms /    16 runs   (    0.59 ms per token,  1692.41 tokens per second)
llama_print_timings: prompt eval time =     286.05 ms /    82 tokens (    3.49 ms per token,   286.67 tokens per second)
llama_print_timings:        eval time =    1655.85 ms /    15 runs   (  110.39 ms per token,     9.06 tokens per second)
llama_print_timings:       total time =    2230.55 ms /    97 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Nevertheless, we want to limit the Council regulations to general provisions and to cover the remaining provisions in implementing regulations.
hyp1: We want the regulations to only cover general provisions and the rest of the regulations.
hyp2: We want the Council's regulations to only cover general provisions while the rest of the provisions should be covered by implementing regulations
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.95 ms /    16 runs   (    0.50 ms per token,  2011.57 tokens per second)
llama_print_timings: prompt eval time =     289.31 ms /    92 tokens (    3.14 ms per token,   318.00 tokens per second)
llama_print_timings:        eval time =    1621.86 ms /    15 runs   (  108.12 ms per token,     9.25 tokens per second)
llama_print_timings:       total time =    2242.85 ms /   107 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I hope that the Euro-Mediterranean Conference to held in Stuttgart between 4 and 6 April will enable us to make progress in that direction.
hyp1: I hope the Euro-Mediterranean Conference will help us make progress in that direction.
hyp2: I hope the Euro-Mediterranean Conference will help you make progress in that direction.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.77 ms /    16 runs   (    0.55 ms per token,  1825.03 tokens per second)
llama_print_timings: prompt eval time =     279.66 ms /    88 tokens (    3.18 ms per token,   314.67 tokens per second)
llama_print_timings:        eval time =    1847.70 ms /    15 runs   (  123.18 ms per token,     8.12 tokens per second)
llama_print_timings:       total time =    2509.04 ms /   103 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In 1996, the Commission submitted its proposal to have Turkey included in the Socrates, Youth for Europe and Leonardo programmes.
hyp1: Turkey was included in the Commission's proposal in 1996.
hyp2: Turkey was included in the Commission's proposal for three different programmes in 1996.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.97 ms /    16 runs   (    0.50 ms per token,  2008.28 tokens per second)
llama_print_timings: prompt eval time =     321.58 ms /   106 tokens (    3.03 ms per token,   329.62 tokens per second)
llama_print_timings:        eval time =    1515.97 ms /    15 runs   (  101.06 ms per token,     9.89 tokens per second)
llama_print_timings:       total time =    2179.53 ms /   121 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We cannot accept Amendments Nos 19, 21 and 51, which are aimed at regulating the import and export of genetically modified organisms.
hyp1: Amendments Nos 21 and 23 are aimed at regulating genetically modified organisms.
hyp2: Amendments Nos 19 and 21 are aimed at regulating genetically modified organisms.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.95 ms /    16 runs   (    0.50 ms per token,  2013.09 tokens per second)
llama_print_timings: prompt eval time =     298.56 ms /    96 tokens (    3.11 ms per token,   321.54 tokens per second)
llama_print_timings:        eval time =    2005.15 ms /    15 runs   (  133.68 ms per token,     7.48 tokens per second)
llama_print_timings:       total time =    2635.53 ms /   111 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I would also like to draw attention to Amendments Nos 27, 29 and 32 on public inquiries or consultation in connection with trial releases.
hyp1: Amendments Nos 27 and 29 are about consultations related to trial releases.
hyp2: Amendments Nos 27 and 29 are related to trial releases.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.50 ms /    16 runs   (    0.53 ms per token,  1882.57 tokens per second)
llama_print_timings: prompt eval time =     291.76 ms /    82 tokens (    3.56 ms per token,   281.06 tokens per second)
llama_print_timings:        eval time =    1641.84 ms /    15 runs   (  109.46 ms per token,     9.14 tokens per second)
llama_print_timings:       total time =    2228.71 ms /    97 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: A true strategic partnership should be established with this great country of a billion inhabitants, compared with 1.2 billion in China.
hyp1: China has 1.2 billion people, while this great country has a billion inhabitants.
hyp2: China has 1.2 billion people, while this great country has a million inhabitants.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.55 ms /    16 runs   (    0.53 ms per token,  1870.25 tokens per second)
llama_print_timings: prompt eval time =     310.43 ms /   105 tokens (    2.96 ms per token,   338.25 tokens per second)
llama_print_timings:        eval time =    1753.43 ms /    15 runs   (  116.90 ms per token,     8.55 tokens per second)
llama_print_timings:       total time =    2666.34 ms /   120 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: This is a positive development, and I can of course endorse the outcome which appears in the form of Compromise Amendments Nos 189 to 201.
hyp1: The outcome appears in the form of Compromise Amendments Nos 189 to 201.
hyp2: The outcome appears in the form of Compromise Amendments Nos 190 to 202.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.27 ms /    16 runs   (    0.52 ms per token,  1934.94 tokens per second)
llama_print_timings: prompt eval time =     286.78 ms /    87 tokens (    3.30 ms per token,   303.37 tokens per second)
llama_print_timings:        eval time =    1950.22 ms /    15 runs   (  130.01 ms per token,     7.69 tokens per second)
llama_print_timings:       total time =    2555.99 ms /   102 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: For instance, in the summer it would perhaps be possible to set up surveillance operations to detect the flimsy craft used to make the crossing.
hyp1: It would be possible to detect the flimsy craft that makes the crossing in the summer.
hyp2: It would be possible to overlook the flimsy craft that makes the crossing in the summer.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      12.50 ms /    16 runs   (    0.78 ms per token,  1280.20 tokens per second)
llama_print_timings: prompt eval time =     301.15 ms /    83 tokens (    3.63 ms per token,   275.61 tokens per second)
llama_print_timings:        eval time =    1592.26 ms /    15 runs   (  106.15 ms per token,     9.42 tokens per second)
llama_print_timings:       total time =    2187.32 ms /    98 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The increase in the number of staff employed must be strictly controlled, and this is a point on which I disagree with the Committee of Wise Men.
hyp1: I agree with the idea of controlling the increase in the number of staff.
hyp2: I don't agree with the idea of controlling the increase in the number of staff.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.90 ms /    16 runs   (    0.56 ms per token,  1797.95 tokens per second)
llama_print_timings: prompt eval time =     270.21 ms /    74 tokens (    3.65 ms per token,   273.87 tokens per second)
llama_print_timings:        eval time =    1837.35 ms /    15 runs   (  122.49 ms per token,     8.16 tokens per second)
llama_print_timings:       total time =    2382.90 ms /    89 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The public must be aware of the collective responsibility borne by the whole Commission, but also of every individual Commissioner's accountability.
hyp1: Every individual accountability must be known by the Commissioners.
hyp2: Every individual Commissioner's accountability must be known by the public.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.96 ms /    16 runs   (    0.50 ms per token,  2010.81 tokens per second)
llama_print_timings: prompt eval time =     304.32 ms /    86 tokens (    3.54 ms per token,   282.60 tokens per second)
llama_print_timings:        eval time =    1608.15 ms /    15 runs   (  107.21 ms per token,     9.33 tokens per second)
llama_print_timings:       total time =    2216.67 ms /   101 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We need to ensure that investigations at the external borders of the EU are effective and to investigate the main routes used for illegal activities.
hyp1: We need to make sure that the investigations at the EU's external borders are effective.
hyp2: We need to make sure that the investigations at the EU's external borders are not effective.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.81 ms /    16 runs   (    0.49 ms per token,  2049.18 tokens per second)
llama_print_timings: prompt eval time =     304.73 ms /   112 tokens (    2.72 ms per token,   367.54 tokens per second)
llama_print_timings:        eval time =    1580.85 ms /    15 runs   (  105.39 ms per token,     9.49 tokens per second)
llama_print_timings:       total time =    2265.13 ms /   127 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: For the same reason, we cannot accept Amendments Nos 5, 6, 7, 11, 16 and 20 or the second part of Amendments Nos 9 and 15.
hyp1: The second part of Amendments 9 and 15 cannot be accepted for the same reason.
hyp2: The fifth part of Amendments 9 and 15 cannot be accepted for the same reason.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.97 ms /    16 runs   (    0.50 ms per token,  2008.54 tokens per second)
llama_print_timings: prompt eval time =     290.94 ms /   101 tokens (    2.88 ms per token,   347.15 tokens per second)
llama_print_timings:        eval time =    1617.47 ms /    15 runs   (  107.83 ms per token,     9.27 tokens per second)
llama_print_timings:       total time =    2234.29 ms /   116 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Commissioner, the Malta forum will take place in March, and it is a great pity that the European Parliament will not be allowed to participate in it.
hyp1: It is sad that the European Parliament won't be able to participate in the Malta forum in March.
hyp2: It is sad that the European Parliament won't be able to participate in the Malta forum in March 2004.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      16.70 ms /    16 runs   (    1.04 ms per token,   958.20 tokens per second)
llama_print_timings: prompt eval time =     338.43 ms /    92 tokens (    3.68 ms per token,   271.84 tokens per second)
llama_print_timings:        eval time =    2142.41 ms /    15 runs   (  142.83 ms per token,     7.00 tokens per second)
llama_print_timings:       total time =    3348.90 ms /   107 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: And I should like to warn this House that we must be careful when voting on Amendment No 98 so that we are not inconsistent.
hyp1: I would like to warn the House that all of us need to be careful when voting on the amendment.
hyp2: I would like to warn the House that they need to be careful when voting on the amendment.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.24 ms /    16 runs   (    0.52 ms per token,  1941.28 tokens per second)
llama_print_timings: prompt eval time =     314.59 ms /   102 tokens (    3.08 ms per token,   324.23 tokens per second)
llama_print_timings:        eval time =    1652.76 ms /    15 runs   (  110.18 ms per token,     9.08 tokens per second)
llama_print_timings:       total time =    2321.70 ms /   117 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In addition, there will be no benefit to exchequers as the taxable interest payments will be outside the 15 Member States.
hyp1: There will be no benefit to exchequers as interest payments outside of the 15 Member States will be taxed.
hyp2: There will be no benefit to exchequers as only interest payments outside of the 15 Member States can be taxed.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.26 ms /    16 runs   (    0.52 ms per token,  1936.58 tokens per second)
llama_print_timings: prompt eval time =     291.25 ms /    85 tokens (    3.43 ms per token,   291.84 tokens per second)
llama_print_timings:        eval time =    1578.19 ms /    15 runs   (  105.21 ms per token,     9.50 tokens per second)
llama_print_timings:       total time =    2167.28 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Mr Florenz said, in his nice way, that we would be taking a softer line and the directive would only apply from the year 2020.
hyp1: The directive would not apply from the year 2020.
hyp2: The directive would only apply from the year 2020.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.78 ms /    16 runs   (    0.49 ms per token,  2055.76 tokens per second)
llama_print_timings: prompt eval time =     272.62 ms /    79 tokens (    3.45 ms per token,   289.78 tokens per second)
llama_print_timings:        eval time =    1549.32 ms /    15 runs   (  103.29 ms per token,     9.68 tokens per second)
llama_print_timings:       total time =    2097.58 ms /    94 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Establishing a new framework directive is certainly going to take some doing: the way it was dealt with at first reading was extremely confusing.
hyp1: The way in which the framework directive was dealt with was very clear.
hyp2: The way in which the framework directive was dealt with was very confusing.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.74 ms /    16 runs   (    0.55 ms per token,  1830.66 tokens per second)
llama_print_timings: prompt eval time =     308.36 ms /    90 tokens (    3.43 ms per token,   291.87 tokens per second)
llama_print_timings:        eval time =    1875.98 ms /    15 runs   (  125.07 ms per token,     8.00 tokens per second)
llama_print_timings:       total time =    2720.76 ms /   105 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: However, there is one issue, outlined in paragraph 11, which clearly detracts from the report's otherwise good intentions.
hyp1: Paragraph 11 contains an issue that detracts from the good intentions of the report.
hyp2: Paragraph 11.4 contains an issue that detracts from the good intentions of the report.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      13.92 ms /    16 runs   (    0.87 ms per token,  1149.34 tokens per second)
llama_print_timings: prompt eval time =     311.01 ms /    98 tokens (    3.17 ms per token,   315.10 tokens per second)
llama_print_timings:        eval time =    1860.74 ms /    15 runs   (  124.05 ms per token,     8.06 tokens per second)
llama_print_timings:       total time =    2737.75 ms /   113 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In Kyoto, the European Union committed itself to reducing greenhouse gas emissions by 8 % compared to 1990 levels before 2012.
hyp1: The European Union pledged in Kyoto to reduce greenhouse gas emissions by 8 percent.
hyp2: The European Union pledged in Kyoto to reduce greenhouse gas emissions by 8 metric tons of CO2.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.60 ms /    16 runs   (    0.48 ms per token,  2104.16 tokens per second)
llama_print_timings: prompt eval time =     305.09 ms /   104 tokens (    2.93 ms per token,   340.88 tokens per second)
llama_print_timings:        eval time =    1567.51 ms /    15 runs   (  104.50 ms per token,     9.57 tokens per second)
llama_print_timings:       total time =    2211.68 ms /   119 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It is not only a pressing problem for the Member States in the Mediterranean region, but also concerns, to a large extent, the entire European Union.
hyp1: It's a big problem for the Member States in the Mediterranean region and it's also a big problem for the European Union.
hyp2: It's a big problem for the United States in the Mediterranean region and it's also a big problem for the European Union.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.33 ms /    16 runs   (    0.58 ms per token,  1715.27 tokens per second)
llama_print_timings: prompt eval time =     295.87 ms /   103 tokens (    2.87 ms per token,   348.13 tokens per second)
llama_print_timings:        eval time =    1721.33 ms /    15 runs   (  114.76 ms per token,     8.71 tokens per second)
llama_print_timings:       total time =    2369.31 ms /   118 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It taxes the interest on the savings of the small investor and the small saver and eases the tax burden on companies and large conglomerates.
hyp1: It lowers the tax burden on companies and large conglomerates by taxing the interest on savings of small investers and savers.
hyp2: It lowers the tax burden on companies and large conglomerates by taxing the interest on savings.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.99 ms /    16 runs   (    0.50 ms per token,  2003.00 tokens per second)
llama_print_timings: prompt eval time =     288.26 ms /    78 tokens (    3.70 ms per token,   270.59 tokens per second)
llama_print_timings:        eval time =    1575.37 ms /    15 runs   (  105.02 ms per token,     9.52 tokens per second)
llama_print_timings:       total time =    2121.36 ms /    93 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Amendment No 3 refers to the putting in place of a Community plan of action which, however, has already been established.
hyp1: The Community plan of action has already been put in place successfully and with only a few delays.
hyp2: The Community plan of action has already been put in place.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.59 ms /    16 runs   (    0.47 ms per token,  2108.31 tokens per second)
llama_print_timings: prompt eval time =     285.47 ms /    87 tokens (    3.28 ms per token,   304.77 tokens per second)
llama_print_timings:        eval time =    1643.59 ms /    15 runs   (  109.57 ms per token,     9.13 tokens per second)
llama_print_timings:       total time =    2212.07 ms /   102 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Berlin was the scene of his rise to fame and it was to Berlin that he returned in 1945 to promote dialogue with the German people.
hyp1: In 1945 she came back to Berlin to promote dialogue with the Germans.
hyp2: In 1945 he came back to Berlin to promote dialogue with the Germans.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.27 ms /    16 runs   (    0.45 ms per token,  2199.62 tokens per second)
llama_print_timings: prompt eval time =     282.94 ms /    84 tokens (    3.37 ms per token,   296.88 tokens per second)
llama_print_timings:        eval time =    1582.69 ms /    15 runs   (  105.51 ms per token,     9.48 tokens per second)
llama_print_timings:       total time =    2151.62 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: However, I am opposed to the proposal contained in paragraph 8 of the resolution, because it will lead to increased supranationality.
hyp1: Paragraph 8 of the resolution will lead to decreased supranationality.
hyp2: Paragraph 8 of the resolution will lead to increased supranationality.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.02 ms /    16 runs   (    0.56 ms per token,  1774.43 tokens per second)
llama_print_timings: prompt eval time =     288.09 ms /    97 tokens (    2.97 ms per token,   336.71 tokens per second)
llama_print_timings:        eval time =    1778.25 ms /    15 runs   (  118.55 ms per token,     8.44 tokens per second)
llama_print_timings:       total time =    2405.31 ms /   112 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: According to what I have heard, the ISO will already have a label ready for the summer of 1999, in other words in a few months' time.
hyp1: The MSZT will have a label ready in a few months, according to what I've heard.
hyp2: The ISO will have a label ready in a few months, according to what I've heard.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.50 ms /    16 runs   (    0.47 ms per token,  2131.91 tokens per second)
llama_print_timings: prompt eval time =     294.05 ms /    74 tokens (    3.97 ms per token,   251.66 tokens per second)
llama_print_timings:        eval time =    1601.09 ms /    15 runs   (  106.74 ms per token,     9.37 tokens per second)
llama_print_timings:       total time =    2153.29 ms /    89 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Nevertheless, there are two particular points that cannot go unmentioned. The first relates to road transport and is the issue of working time.
hyp1: The particular issue of working time within road transport cannot be ignored.
hyp2: The issues of working time and road transport cannot be ignored.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.65 ms /    16 runs   (    0.48 ms per token,  2091.78 tokens per second)
llama_print_timings: prompt eval time =     277.27 ms /    88 tokens (    3.15 ms per token,   317.38 tokens per second)
llama_print_timings:        eval time =    1557.07 ms /    15 runs   (  103.80 ms per token,     9.63 tokens per second)
llama_print_timings:       total time =    2135.09 ms /   103 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In May 1998, the Single Market Council adopted provisions regarding the development of a single market in medicinal products.
hyp1: The Development of a Single Market in Medicinal Products was adopted by the Single Market Council in 1989.
hyp2: The Development of a Single Market in Medicinal Products was adopted by the Single Market Council.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.99 ms /    16 runs   (    0.50 ms per token,  2002.50 tokens per second)
llama_print_timings: prompt eval time =     283.19 ms /    86 tokens (    3.29 ms per token,   303.68 tokens per second)
llama_print_timings:        eval time =    1505.25 ms /    15 runs   (  100.35 ms per token,     9.97 tokens per second)
llama_print_timings:       total time =    2093.05 ms /   101 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We should remember that the nuclear power station at Chernobyl was, until April 1986, said to have presented 'few risks'.
hyp1: The nuclear power station at Chernobyl was said to have presented'few risks'.
hyp2: The nuclear power station at Chernobyl was said to have presented 'no risks'.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.33 ms /    16 runs   (    0.58 ms per token,  1715.08 tokens per second)
llama_print_timings: prompt eval time =     283.10 ms /    90 tokens (    3.15 ms per token,   317.91 tokens per second)
llama_print_timings:        eval time =    1870.88 ms /    15 runs   (  124.73 ms per token,     8.02 tokens per second)
llama_print_timings:       total time =    2494.16 ms /   105 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The report we asked for from the committee of experts is part of the consequences of our withholding the discharge for 1996.
hyp1: The committee of experts gave us a report on the consequences of withholding the discharge.
hyp2: The committee of experts gave us a report as part of the consequences of withholding the discharge.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.40 ms /    16 runs   (    0.53 ms per token,  1904.31 tokens per second)
llama_print_timings: prompt eval time =     308.66 ms /    87 tokens (    3.55 ms per token,   281.87 tokens per second)
llama_print_timings:        eval time =    1562.69 ms /    15 runs   (  104.18 ms per token,     9.60 tokens per second)
llama_print_timings:       total time =    2155.20 ms /   102 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The irregularities which have been identified must never happen again, and this means that both the methods and the approach need to be changed.
hyp1: The approach and methods need to be changed because they need to never happen again.
hyp2: It's imperative to never repeat these irregularities, which requires a change in the methods and approach. 
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.65 ms /    16 runs   (    0.48 ms per token,  2091.23 tokens per second)
llama_print_timings: prompt eval time =     290.48 ms /    98 tokens (    2.96 ms per token,   337.37 tokens per second)
llama_print_timings:        eval time =    1544.74 ms /    15 runs   (  102.98 ms per token,     9.71 tokens per second)
llama_print_timings:       total time =    2146.25 ms /   113 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I am referring more particularly here to Amendments Nos 4 and 33, which provide for the directive to be monitored by the Commission.
hyp1: Amendments Nos 4 and 33 give the Commission no power to monitor the directive.
hyp2: Amendments Nos 4 and 33 give the Commission the power to monitor the directive.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.14 ms /    16 runs   (    0.51 ms per token,  1964.88 tokens per second)
llama_print_timings: prompt eval time =     271.78 ms /    82 tokens (    3.31 ms per token,   301.71 tokens per second)
llama_print_timings:        eval time =    1561.09 ms /    15 runs   (  104.07 ms per token,     9.61 tokens per second)
llama_print_timings:       total time =    2100.85 ms /    97 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: That is why it is also important that the innovation dimension should become a more integral part of interregional and cross-border cooperation.
hyp1: It is important that innovation is included in interregional and cross border cooperation.
hyp2: It is important that innovation becomes a more essential part of interregional and cross border cooperation.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.14 ms /    16 runs   (    0.57 ms per token,  1750.55 tokens per second)
llama_print_timings: prompt eval time =     272.50 ms /    78 tokens (    3.49 ms per token,   286.23 tokens per second)
llama_print_timings:        eval time =    1795.65 ms /    15 runs   (  119.71 ms per token,     8.35 tokens per second)
llama_print_timings:       total time =    2390.43 ms /    93 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Environmental considerations must feature prominently in this process, particularly sustainable development, as it is one of the distinguishing features of the European Union.
hyp1: Sustainable development is a distinguishing feature of the European Union.
hyp2: One of the distinguishing features of the European Union is the environment.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.78 ms /    16 runs   (    0.49 ms per token,  2057.61 tokens per second)
llama_print_timings: prompt eval time =     295.50 ms /    98 tokens (    3.02 ms per token,   331.64 tokens per second)
llama_print_timings:        eval time =    1509.82 ms /    15 runs   (  100.65 ms per token,     9.93 tokens per second)
llama_print_timings:       total time =    2124.42 ms /   113 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Mrs Haug's report is an important addition to the Agenda 2000 debate, and to the ongoing preparation process for the next phase of the programme.
hyp1: An important addition to the Agenda 2000 debate is Mx Haug's report.
hyp2: An important addition to the Agenda 2000 debate is Mrs Haug's report.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.19 ms /    15 runs   (    0.48 ms per token,  2085.94 tokens per second)
llama_print_timings: prompt eval time =     301.88 ms /   109 tokens (    2.77 ms per token,   361.08 tokens per second)
llama_print_timings:        eval time =    1430.72 ms /    14 runs   (  102.19 ms per token,     9.79 tokens per second)
llama_print_timings:       total time =    2094.46 ms /   123 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Fortunately, there has been a cease-fire since 1994 which is opening the way for negotiation, and the Minsk Group is taking charge of this.
hyp1: There has been an open-fire since 1994 and the Minsk Group is in charge of this.
hyp2: There has been a cease-fire since 1994 and the Minsk Group is in charge of this.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.11 ms /    16 runs   (    0.51 ms per token,  1972.39 tokens per second)
llama_print_timings: prompt eval time =     288.16 ms /   110 tokens (    2.62 ms per token,   381.74 tokens per second)
llama_print_timings:        eval time =    1669.69 ms /    15 runs   (  111.31 ms per token,     8.98 tokens per second)
llama_print_timings:       total time =    2317.93 ms /   125 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Women make up 80 % of the people who carry out secretarial and office work, while men occupy 87 % of the managerial positions.
hyp1: Eighty percent of the people who work in the office are women, while 87 percent are men.
hyp2: Eighty percent of the people who work in the secretarial positions at the office are women, while 87 percent of those with managerial positions are men.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.72 ms /    16 runs   (    0.61 ms per token,  1645.92 tokens per second)
llama_print_timings: prompt eval time =     293.22 ms /    93 tokens (    3.15 ms per token,   317.17 tokens per second)
llama_print_timings:        eval time =    1830.86 ms /    15 runs   (  122.06 ms per token,     8.19 tokens per second)
llama_print_timings:       total time =    2561.72 ms /   108 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In a similar manner, I could accept the proposed Amendment No 11, which would require the same separation but in a shorter time period.
hyp1: The proposed Amendment No 11 requires the same separation, but in a shorter period of time.
hyp2: The proposed Amendment No 11 previously required the same separation, but in a shorter period of time.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.00 ms /    16 runs   (    0.56 ms per token,  1777.19 tokens per second)
llama_print_timings: prompt eval time =     301.02 ms /    85 tokens (    3.54 ms per token,   282.37 tokens per second)
llama_print_timings:        eval time =    1535.31 ms /    15 runs   (  102.35 ms per token,     9.77 tokens per second)
llama_print_timings:       total time =    2112.87 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The outcome will hinge on how the new decision-making procedures work out in practice. We cannot therefore support paragraphs 30 and 31.
hyp1: Paragraphs 300 and 31 are not supported by us.
hyp2: Paragraphs 30 and 31 are not supported by us.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.99 ms /    16 runs   (    0.50 ms per token,  2001.75 tokens per second)
llama_print_timings: prompt eval time =     280.78 ms /    84 tokens (    3.34 ms per token,   299.16 tokens per second)
llama_print_timings:        eval time =    1754.23 ms /    15 runs   (  116.95 ms per token,     8.55 tokens per second)
llama_print_timings:       total time =    2344.60 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: But the US, in threatening a trade boycott, is overstepping the bounds of that clear position and the bounds of the transatlantic partnership.
hyp1: The US is overstepping its bounds by threatening to boycott the oil trade.
hyp2: The US is overstepping its bounds by threatening to boycott trade.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.24 ms /    16 runs   (    0.52 ms per token,  1941.28 tokens per second)
llama_print_timings: prompt eval time =     388.60 ms /   161 tokens (    2.41 ms per token,   414.31 tokens per second)
llama_print_timings:        eval time =    1997.17 ms /    15 runs   (  133.14 ms per token,     7.51 tokens per second)
llama_print_timings:       total time =    2906.03 ms /   176 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In short, therefore, the Commission accepts Amendment Nos 1, 5, 6, 7, 8, 9 and 11, whilst rejecting Amendments Nos 2, 3, 4, 10 and 12.
hyp1: The Commission accepts Amendments 1 to 9 and 11 and rejects Amendments 3, 4, 10 and 12.
hyp2: The Commission accepts Amendments 1, 5, 6, 7, 8, 9 and 11 and rejects Amendments 3, 4, 10 and 12.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.60 ms /    16 runs   (    0.54 ms per token,  1861.55 tokens per second)
llama_print_timings: prompt eval time =     313.23 ms /    98 tokens (    3.20 ms per token,   312.87 tokens per second)
llama_print_timings:        eval time =    1644.42 ms /    15 runs   (  109.63 ms per token,     9.12 tokens per second)
llama_print_timings:       total time =    2458.01 ms /   113 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: While the Kalanke ruling was a setback, the European Court of Justice's decision in the 1997 Marschall judgment was encouraging.
hyp1: The European Court of Justice's 1997 Marschall judgement was an encouraging one.
hyp2: The European Court of Justice's 1997 Marschall judgement was not an encouraging one.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.75 ms /    16 runs   (    0.48 ms per token,  2065.58 tokens per second)
llama_print_timings: prompt eval time =     291.12 ms /    89 tokens (    3.27 ms per token,   305.72 tokens per second)
llama_print_timings:        eval time =    1539.18 ms /    15 runs   (  102.61 ms per token,     9.75 tokens per second)
llama_print_timings:       total time =    2126.05 ms /   104 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: People would think we were mad if we said tomorrow that drivers were allowed to take their cars on the road without an insurance certificate.
hyp1: If we said drivers could take their cars on the road without insurance, people would not think we were crazy.
hyp2: If we said drivers could take their cars on the road without insurance, people would think we were crazy.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.24 ms /    16 runs   (    0.51 ms per token,  1942.45 tokens per second)
llama_print_timings: prompt eval time =     273.96 ms /    79 tokens (    3.47 ms per token,   288.36 tokens per second)
llama_print_timings:        eval time =    1646.57 ms /    15 runs   (  109.77 ms per token,     9.11 tokens per second)
llama_print_timings:       total time =    2194.73 ms /    94 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: As we have already said, the adoption of such a stance is incompatible with the aims of the Treaty, because agricultural expenditure is compulsory.
hyp1: The aims of the Treaty are compatible with the stance adopted.
hyp2: The aims of the Treaty are incompatible with the stance adopted.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.35 ms /    14 runs   (    0.60 ms per token,  1676.04 tokens per second)
llama_print_timings: prompt eval time =     305.52 ms /    79 tokens (    3.87 ms per token,   258.58 tokens per second)
llama_print_timings:        eval time =    1634.56 ms /    13 runs   (  125.74 ms per token,     7.95 tokens per second)
llama_print_timings:       total time =    2393.43 ms /    92 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Of the 279 local authorities that exist in Sweden, 211 - all of them in the interior of the country - have lost part of their population.
hyp1: The majority of the national authorities in Sweden have lost population.
hyp2: The majority of the local authorities in Sweden have lost population.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.77 ms /    16 runs   (    0.49 ms per token,  2059.20 tokens per second)
llama_print_timings: prompt eval time =     327.75 ms /   126 tokens (    2.60 ms per token,   384.45 tokens per second)
llama_print_timings:        eval time =    1518.01 ms /    15 runs   (  101.20 ms per token,     9.88 tokens per second)
llama_print_timings:       total time =    2261.68 ms /   141 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: This is also the reason why we cannot accept Amendment No 25 or Amendments Nos 46 and 54, but we can endorse Amendment No 43.
hyp1: I can endorse Amendment No 43 but I can't accept Amendments No 25 and Nos 46 and 54.
hyp2: We can endorse Amendment No 43 but we can't accept Amendments No 25 and Nos 46 and 54.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.88 ms /    16 runs   (    0.49 ms per token,  2030.46 tokens per second)
llama_print_timings: prompt eval time =     291.62 ms /    95 tokens (    3.07 ms per token,   325.76 tokens per second)
llama_print_timings:        eval time =    1548.85 ms /    15 runs   (  103.26 ms per token,     9.68 tokens per second)
llama_print_timings:       total time =    2145.21 ms /   110 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Madam President, in yesterday's International Herald Tribune, the following appeared in the section 'News from 50 years ago'.
hyp1: The section "news from 50 years ago" appeared in yesterday'sInternational Herald Tribune.
hyp2: This appeared in section "news from 50 years ago" in yesterday's International Herald Tribune.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.48 ms /    16 runs   (    0.47 ms per token,  2138.47 tokens per second)
llama_print_timings: prompt eval time =     287.68 ms /    88 tokens (    3.27 ms per token,   305.89 tokens per second)
llama_print_timings:        eval time =    1574.10 ms /    15 runs   (  104.94 ms per token,     9.53 tokens per second)
llama_print_timings:       total time =    2151.72 ms /   103 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The current status quo of a 12-mile limit for the area is the best possible compromise for the whole of the French fishing sector.
hyp1: The current 12 kilometer limit is the best compromise for the entire French fishing sector.
hyp2: The current 12-mile limit is the best compromise for the entire French fishing sector.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.86 ms /    16 runs   (    0.55 ms per token,  1806.28 tokens per second)
llama_print_timings: prompt eval time =     279.75 ms /    84 tokens (    3.33 ms per token,   300.27 tokens per second)
llama_print_timings:        eval time =    1792.78 ms /    15 runs   (  119.52 ms per token,     8.37 tokens per second)
llama_print_timings:       total time =    2405.19 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It has come to my knowledge that we have 300 freelance interpreters whom the Commission has not paid properly since October.
hyp1: 300 interpreters have been adequetly paid  by the Commission. 
hyp2: 300 interpreters have not been paid properly by the Commission.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.91 ms /    16 runs   (    0.49 ms per token,  2021.73 tokens per second)
llama_print_timings: prompt eval time =     305.61 ms /    94 tokens (    3.25 ms per token,   307.58 tokens per second)
llama_print_timings:        eval time =    1645.65 ms /    15 runs   (  109.71 ms per token,     9.11 tokens per second)
llama_print_timings:       total time =    2252.17 ms /   109 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: For example, the conclave of foreign ministers in Luxembourg on 21 February made substantial progress on a number of issues.
hyp1: Significant progress was made on a number of issues during the foreign minister's conclave in Luxembourg City.
hyp2: Significant progress was made on a number of issues during the foreign minister's conclave in Luxembourg.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.13 ms /    16 runs   (    0.51 ms per token,  1968.75 tokens per second)
llama_print_timings: prompt eval time =     291.85 ms /    86 tokens (    3.39 ms per token,   294.67 tokens per second)
llama_print_timings:        eval time =    1496.46 ms /    15 runs   (   99.76 ms per token,    10.02 tokens per second)
llama_print_timings:       total time =    2072.54 ms /   101 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Cigarettes account for the other 95 %, but as far as they are concerned, this report only deals with technical adjustments to the taxation regime.
hyp1: The report only deals with technical adjustments to the taxation regime and excludes cigarettes.
hyp2: The report only deals with technical adjustments to the taxation regime.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.47 ms /    16 runs   (    0.59 ms per token,  1689.90 tokens per second)
llama_print_timings: prompt eval time =     290.07 ms /    80 tokens (    3.63 ms per token,   275.79 tokens per second)
llama_print_timings:        eval time =    1863.22 ms /    15 runs   (  124.21 ms per token,     8.05 tokens per second)
llama_print_timings:       total time =    2618.25 ms /    95 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: But at that time the agricultural proportion of the budget, accounting for over 70 %, was considerably higher than it is today.
hyp1: More than 70 dollars from the budget was devoted to agriculture at that time.
hyp2: More than 70 percent of the budget was devoted to agriculture at that time.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.28 ms /    16 runs   (    0.58 ms per token,  1725.07 tokens per second)
llama_print_timings: prompt eval time =     312.65 ms /    96 tokens (    3.26 ms per token,   307.05 tokens per second)
llama_print_timings:        eval time =    1865.46 ms /    15 runs   (  124.36 ms per token,     8.04 tokens per second)
llama_print_timings:       total time =    2707.00 ms /   111 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In two days' time, we shall have the debate and the decisions of the European Council on Agenda 2000 and the financial perspective.
hyp1: The European Council on Agenda 2000 and financial perspective will be debated in twenty two days.
hyp2: The European Council on Agenda 2000 and financial perspective will be debated in two days.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.85 ms /    16 runs   (    0.49 ms per token,  2039.00 tokens per second)
llama_print_timings: prompt eval time =     304.40 ms /    86 tokens (    3.54 ms per token,   282.52 tokens per second)
llama_print_timings:        eval time =    1586.90 ms /    15 runs   (  105.79 ms per token,     9.45 tokens per second)
llama_print_timings:       total time =    2182.87 ms /   101 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The Member States are, in other words, being asked to harmonise their economic policies - which is one of the reasons why I voted against this report.
hyp1: I voted against the report because Member States are being asked to change their economic policies.
hyp2: I voted against the report because Member States are being asked to reconcile their economic policies.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.72 ms /    16 runs   (    0.48 ms per token,  2072.27 tokens per second)
llama_print_timings: prompt eval time =     275.36 ms /    80 tokens (    3.44 ms per token,   290.53 tokens per second)
llama_print_timings:        eval time =    1566.69 ms /    15 runs   (  104.45 ms per token,     9.57 tokens per second)
llama_print_timings:       total time =    2124.55 ms /    95 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We have also considered this, and we think that these vehicles should only be exempt from Articles 4 and 7 of the proposal.
hyp1: We think the vehicles should be exempt articles number 4 and 7 from the proposal.
hyp2: We think the vehicles should be exempt from the proposal.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.39 ms /    16 runs   (    0.46 ms per token,  2164.50 tokens per second)
llama_print_timings: prompt eval time =     279.88 ms /    96 tokens (    2.92 ms per token,   343.00 tokens per second)
llama_print_timings:        eval time =    1579.39 ms /    15 runs   (  105.29 ms per token,     9.50 tokens per second)
llama_print_timings:       total time =    2172.66 ms /   111 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: How is rail transport supposed to compete with road transport if large containers cannot be brought to the trains and we are therefore forced to accept the 44 tonne limit?
hyp1: If rail transport can't be brought to the trains, how can large containers compete with road transportation?
hyp2: If large containers can't be brought to the trains, how can rail transport compete with road transportation?
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.03 ms /    16 runs   (    0.56 ms per token,  1771.68 tokens per second)
llama_print_timings: prompt eval time =     301.34 ms /    98 tokens (    3.07 ms per token,   325.22 tokens per second)
llama_print_timings:        eval time =    1776.46 ms /    15 runs   (  118.43 ms per token,     8.44 tokens per second)
llama_print_timings:       total time =    2614.93 ms /   113 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: For the latter, the initial birth of several operators is now giving way to the reconcentration of the sector in the hands of a single company.
hyp1: The establishment of a number of operators is giving way to the reconcentration of the sector in the hands of one company.
hyp2: Several operators have given way to the reconcentration of the sector in the hands of one company.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.51 ms /    16 runs   (    0.47 ms per token,  2129.36 tokens per second)
llama_print_timings: prompt eval time =     285.28 ms /    68 tokens (    4.20 ms per token,   238.36 tokens per second)
llama_print_timings:        eval time =    1512.27 ms /    15 runs   (  100.82 ms per token,     9.92 tokens per second)
llama_print_timings:       total time =    2023.44 ms /    83 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: At the same time, and I always thought this was correct, we have quite simply deleted Chapter 8 from the Convention entirely.
hyp1: I have completely removed Chapter 8 from the Convention.
hyp2: We have completely removed Chapter 8 from the Convention.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.72 ms /    16 runs   (    0.48 ms per token,  2072.81 tokens per second)
llama_print_timings: prompt eval time =     296.34 ms /    85 tokens (    3.49 ms per token,   286.83 tokens per second)
llama_print_timings:        eval time =    1570.06 ms /    15 runs   (  104.67 ms per token,     9.55 tokens per second)
llama_print_timings:       total time =    2140.20 ms /   100 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It is true that even greater efforts could be made in this direction, as my honourable colleague said, and indeed we are demanding progress here.
hyp1: I agree with my colleague and fellow lawyers that more efforts could be made in this direction.
hyp2: I agree with my colleague that more efforts could be made in this direction.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.11 ms /    16 runs   (    0.51 ms per token,  1974.09 tokens per second)
llama_print_timings: prompt eval time =     280.05 ms /    83 tokens (    3.37 ms per token,   296.37 tokens per second)
llama_print_timings:        eval time =    1581.67 ms /    15 runs   (  105.44 ms per token,     9.48 tokens per second)
llama_print_timings:       total time =    2129.92 ms /    98 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: For that reason, on 17 December 1998 the Council adopted a comprehensive programme to supply agricultural produce to Russia.
hyp1: The Council adopted a programme to supply agricultural produce to Russia in 1917.
hyp2: The Council adopted a programme to supply agricultural produce to Russia in 1998.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =      12.41 ms /    16 runs   (    0.78 ms per token,  1289.18 tokens per second)
llama_print_timings: prompt eval time =     313.80 ms /    93 tokens (    3.37 ms per token,   296.36 tokens per second)
llama_print_timings:        eval time =    1759.77 ms /    15 runs   (  117.32 ms per token,     8.52 tokens per second)
llama_print_timings:       total time =    2592.66 ms /   108 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Frontier regions where the inhabitants do not understand their neighbour's language cannot fully experience the single market at local level.
hyp1: The single market can't be fully experienced in frontier regions where the inhabitants understand their neighbours' languages.
hyp2: The single market can't be fully experienced in frontier regions where the inhabitants don't understand their neighbours' languages.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.71 ms /    16 runs   (    0.48 ms per token,  2076.30 tokens per second)
llama_print_timings: prompt eval time =     282.42 ms /    87 tokens (    3.25 ms per token,   308.05 tokens per second)
llama_print_timings:        eval time =    1615.91 ms /    15 runs   (  107.73 ms per token,     9.28 tokens per second)
llama_print_timings:       total time =    2176.82 ms /   102 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We welcome the adoption by the Council of Ministers of general guidelines on the issue of the death penalty in June 1998.
hyp1: The Council of Ministers adopted general guidelines regarding the death penalty in late 1998.
hyp2: The Council of Ministers adopted general guidelines regarding the death penalty in 1998.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.09 ms /    16 runs   (    0.51 ms per token,  1977.75 tokens per second)
llama_print_timings: prompt eval time =     295.39 ms /    91 tokens (    3.25 ms per token,   308.07 tokens per second)
llama_print_timings:        eval time =    1645.64 ms /    15 runs   (  109.71 ms per token,     9.12 tokens per second)
llama_print_timings:       total time =    2233.87 ms /   106 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: In addition, television campaigns designed to dissuade would-be immigrants from making the crossing could be broadcast on television in northern Morocco.
hyp1: Television campaigns designed to discourage would-be immigrants from crossing the border could be broadcasted on television.
hyp2: Television campaigns designed to persuade would-be immigrants from crossing the border could be broadcasted on television.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.33 ms /    16 runs   (    0.52 ms per token,  1920.54 tokens per second)
llama_print_timings: prompt eval time =     294.73 ms /   100 tokens (    2.95 ms per token,   339.30 tokens per second)
llama_print_timings:        eval time =    1641.22 ms /    15 runs   (  109.41 ms per token,     9.14 tokens per second)
llama_print_timings:       total time =    2263.95 ms /   115 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Thirdly, the Commission agrees with the preoccupation of the rapporteur as far as the famous RAL, reste à liquider , the backlog, is concerned.
hyp1: The Commission agrees with the rapporteur's preoccupation with the famous RAL, reste liquider.
hyp2: The Commission agrees with the rapporteur's preoccupation with the famous RAL, reste à liquider.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.06 ms /    16 runs   (    0.57 ms per token,  1766.39 tokens per second)
llama_print_timings: prompt eval time =     292.69 ms /    76 tokens (    3.85 ms per token,   259.66 tokens per second)
llama_print_timings:        eval time =    1768.70 ms /    15 runs   (  117.91 ms per token,     8.48 tokens per second)
llama_print_timings:       total time =    2463.27 ms /    91 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: 1.Negotiations between the governments on the reform of agricultural policy are still in progress and will be resumed today.
hyp1: The negotiations regarding the reform of agricultural policy have been finalized by the governments.
hyp2: The reform of agricultural policy is still being negotiated by the governments.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.35 ms /    16 runs   (    0.46 ms per token,  2178.06 tokens per second)
llama_print_timings: prompt eval time =     295.34 ms /    81 tokens (    3.65 ms per token,   274.26 tokens per second)
llama_print_timings:        eval time =    1513.81 ms /    15 runs   (  100.92 ms per token,     9.91 tokens per second)
llama_print_timings:       total time =    2067.47 ms /    96 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It also recommends strengthening the initiative-taking and political impetus of the Commission, whose political accountability would also be improved in that way.
hyp1: The Commission's political accountability would not be improved in this way.
hyp2: The Commission's political accountability would be improved in this way.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.84 ms /    16 runs   (    0.49 ms per token,  2041.60 tokens per second)
llama_print_timings: prompt eval time =     287.69 ms /    96 tokens (    3.00 ms per token,   333.69 tokens per second)
llama_print_timings:        eval time =    1530.29 ms /    15 runs   (  102.02 ms per token,     9.80 tokens per second)
llama_print_timings:       total time =    2130.10 ms /   111 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Another declaration to mention in this respect is Declaration No 39 annexed to the Treaty of Amsterdam on the quality of drafting.
hyp1: Declaration No 39 was annexed to the Treaty of Amsterdam to discuss drafting quality.
hyp2: Declaration No 39 wasn't annexed to the Treaty of Amsterdam to discuss drafting quality.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.72 ms /    16 runs   (    0.48 ms per token,  2072.81 tokens per second)
llama_print_timings: prompt eval time =     282.93 ms /    87 tokens (    3.25 ms per token,   307.50 tokens per second)
llama_print_timings:        eval time =    1517.97 ms /    15 runs   (  101.20 ms per token,     9.88 tokens per second)
llama_print_timings:       total time =    2089.92 ms /   102 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: It is extremely important that the forthcoming European Council meeting in Berlin should cut the Gordian knot on Agenda 2000.
hyp1: Agenda 2000 should be fully handled during the European Council meeting in Berlin.
hyp2: Agenda 2000 should be cut at the European Council meeting in Berlin.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.19 ms /    16 runs   (    0.57 ms per token,  1741.97 tokens per second)
llama_print_timings: prompt eval time =     329.56 ms /   110 tokens (    3.00 ms per token,   333.78 tokens per second)
llama_print_timings:        eval time =    1565.09 ms /    15 runs   (  104.34 ms per token,     9.58 tokens per second)
llama_print_timings:       total time =    2450.51 ms /   125 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: As a result of this investment, which I fully support, the suburban rail network will be increased in capacity in terms of over 60 %.
hyp1: The investment will increase the suburban rail network's capacity by more than 60 percent.
hyp2: The investment will increase the suburban rail network's capacity by more than 60 percentm which is an investment I support given the latest numbers provided by the Suburban Rail Network Committee.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.55 ms /    16 runs   (    0.47 ms per token,  2120.33 tokens per second)
llama_print_timings: prompt eval time =     301.04 ms /    97 tokens (    3.10 ms per token,   322.22 tokens per second)
llama_print_timings:        eval time =    1495.62 ms /    15 runs   (   99.71 ms per token,    10.03 tokens per second)
llama_print_timings:       total time =    2106.04 ms /   112 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: That is precisely why we cannot give way to the unjustified US demand, even though and in fact just because they are threatening lunatic sanctions.
hyp1: That's why we can't give in to the US demand even though they're threatening sanctions.
hyp2: That's why they can't give in to the US demand even though we're threatening sanctions.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.00 ms /    16 runs   (    0.50 ms per token,  2000.00 tokens per second)
llama_print_timings: prompt eval time =     269.60 ms /    76 tokens (    3.55 ms per token,   281.90 tokens per second)
llama_print_timings:        eval time =    1556.51 ms /    15 runs   (  103.77 ms per token,     9.64 tokens per second)
llama_print_timings:       total time =    2072.04 ms /    91 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: All aspects of the question need to be brought together; perhaps a thorough investigation of the whole practice of aquaculture is now called for.
hyp1: A thorough investigation of all aspects of the question is needed.
hyp2: A thorough investigation of all aspects of aquaculture may we warranted. 
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.09 ms /    16 runs   (    0.51 ms per token,  1978.24 tokens per second)
llama_print_timings: prompt eval time =     279.69 ms /    82 tokens (    3.41 ms per token,   293.18 tokens per second)
llama_print_timings:        eval time =    1649.05 ms /    15 runs   (  109.94 ms per token,     9.10 tokens per second)
llama_print_timings:       total time =    2215.15 ms /    97 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Mr President, the dossier on European cities of culture which has been open since October 1997 will be closed, I hope, tomorrow.
hyp1: I hope that the European cities of culture will be closed tomorrow.
hyp2: I hope that the dossier on European cities of culture will be closed tomorrow.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.82 ms /    15 runs   (    0.52 ms per token,  1917.42 tokens per second)
llama_print_timings: prompt eval time =     311.18 ms /    84 tokens (    3.70 ms per token,   269.94 tokens per second)
llama_print_timings:        eval time =    1490.21 ms /    14 runs   (  106.44 ms per token,     9.39 tokens per second)
llama_print_timings:       total time =    2211.91 ms /    98 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Premiums are to be increased by 10 %, with a corresponding reduction in special subsidies allocated through producer organisations.
hyp1: Premiums will decrease by 10 % and special subsidies will be increased.
hyp2: Premiums will increase by 10 % and special subsidies will be reduced.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.52 ms /    16 runs   (    0.47 ms per token,  2127.09 tokens per second)
llama_print_timings: prompt eval time =     282.71 ms /    84 tokens (    3.37 ms per token,   297.12 tokens per second)
llama_print_timings:        eval time =    1517.92 ms /    15 runs   (  101.19 ms per token,     9.88 tokens per second)
llama_print_timings:       total time =    2070.66 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: At the end of 1997, the Commission put forward a controversial proposal for the harmonisation of copyright law in the Community.
hyp1: The proposal for copyright harmonization was put forward at the end of 1997.
hyp2: The harmonisation proposal was put forward at the end of 1997.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.08 ms /    16 runs   (    0.50 ms per token,  1980.93 tokens per second)
llama_print_timings: prompt eval time =     285.68 ms /    99 tokens (    2.89 ms per token,   346.55 tokens per second)
llama_print_timings:        eval time =    1524.93 ms /    15 runs   (  101.66 ms per token,     9.84 tokens per second)
llama_print_timings:       total time =    2122.28 ms /   114 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I had proposed to you that the Euro-11 be recognised in the Treaty, but my understanding was that Mr Spiers did not agree.
hyp1: I wanted the Euro-11 to be recognised in the Treaty but Mr. Spiers didn't agree with me.
hyp2: I wanted the Euro-11 to be recognised in the Treaty and Mr. Spiers agreed with me.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.23 ms /    16 runs   (    0.51 ms per token,  1944.34 tokens per second)
llama_print_timings: prompt eval time =     272.11 ms /    77 tokens (    3.53 ms per token,   282.97 tokens per second)
llama_print_timings:        eval time =    1671.21 ms /    15 runs   (  111.41 ms per token,     8.98 tokens per second)
llama_print_timings:       total time =    2209.40 ms /    92 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: As a result, we would like to be convinced that these cooperation agreements are not going to contribute to such extremely dangerous research.
hyp1: She want to be sure that these agreements won't contribute to dangerous research.
hyp2: We want to be sure that these agreements won't contribute to dangerous research.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.63 ms /    16 runs   (    0.48 ms per token,  2095.89 tokens per second)
llama_print_timings: prompt eval time =     307.53 ms /    84 tokens (    3.66 ms per token,   273.15 tokens per second)
llama_print_timings:        eval time =    1531.61 ms /    15 runs   (  102.11 ms per token,     9.79 tokens per second)
llama_print_timings:       total time =    2117.22 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: An important factor here is that the pay and other working conditions of those employed in navigation should be in line with those of other occupations.
hyp1: The pay and working conditions of those employed in navigation should be better than those of other jobs.
hyp2: The pay and working conditions of those employed in navigation should match those of other jobs.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.89 ms /    16 runs   (    0.49 ms per token,  2027.88 tokens per second)
llama_print_timings: prompt eval time =     292.70 ms /   106 tokens (    2.76 ms per token,   362.15 tokens per second)
llama_print_timings:        eval time =    1588.10 ms /    15 runs   (  105.87 ms per token,     9.45 tokens per second)
llama_print_timings:       total time =    2206.91 ms /   121 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Indeed, without the chairmanship skills of the Deputy Prime Minister, John Prescott, the Conference would have produced more hot air than it prevented!
hyp1: The Conference wouldn't have produced as much hot air if it weren't for the chairmanship skills of the deputy prime minister.
hyp2: The Conference would have made more hot air than it prevented if it weren't for the chairmanship skills of the deputy prime minister!
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.71 ms /    16 runs   (    0.48 ms per token,  2074.15 tokens per second)
llama_print_timings: prompt eval time =     275.48 ms /    87 tokens (    3.17 ms per token,   315.81 tokens per second)
llama_print_timings:        eval time =    1655.50 ms /    15 runs   (  110.37 ms per token,     9.06 tokens per second)
llama_print_timings:       total time =    2207.42 ms /   102 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I want to say very clearly that the Commission is no longer be prepared to accept ever-increasing tasks without receiving the means to execute them.
hyp1: The Commission is not prepared to accept ever increasing tasks  while getting the means to execute them.
hyp2: The Commission is not prepared to accept ever increasing tasks without getting the means to execute them.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.47 ms /    16 runs   (    0.59 ms per token,  1689.55 tokens per second)
llama_print_timings: prompt eval time =     292.37 ms /   102 tokens (    2.87 ms per token,   348.87 tokens per second)
llama_print_timings:        eval time =    1798.50 ms /    15 runs   (  119.90 ms per token,     8.34 tokens per second)
llama_print_timings:       total time =    2432.27 ms /   117 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The enlargement of the EU compels us to pay greater attention to these questions, a point which is also emphasised in the committee's proposals.
hyp1: The committee's proposals emphasize that the EU's enlargement makes us pay more attention to the questions.
hyp2: The committee's proposals emphasize that the EEAS's enlargement makes us pay more attention to the questions.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.61 ms /    16 runs   (    0.54 ms per token,  1859.38 tokens per second)
llama_print_timings: prompt eval time =     306.61 ms /    84 tokens (    3.65 ms per token,   273.96 tokens per second)
llama_print_timings:        eval time =    1510.24 ms /    15 runs   (  100.68 ms per token,     9.93 tokens per second)
llama_print_timings:       total time =    2083.39 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: I am referring to the current vote but in relation to a vote that will take place later, namely the vote on Amendment No 98.
hyp1: The vote on Amendment No 98 will take place before the current vote.
hyp2: The vote on Amendment No 98 will take place after the current vote.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.61 ms /    16 runs   (    0.48 ms per token,  2103.05 tokens per second)
llama_print_timings: prompt eval time =     283.45 ms /    86 tokens (    3.30 ms per token,   303.40 tokens per second)
llama_print_timings:        eval time =    1470.38 ms /    15 runs   (   98.03 ms per token,    10.20 tokens per second)
llama_print_timings:       total time =    2022.94 ms /   101 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: When I became a Member of the European Parliament in 1994, I was struck by the lack of knowledge and the total absence of understanding of the overseas territories.
hyp1: I was struck by the abundance of knowledge when I joined the European Parliament.
hyp2: I was struck by the lack of knowledge when I joined the European Parliament.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.76 ms /    16 runs   (    0.49 ms per token,  2061.06 tokens per second)
llama_print_timings: prompt eval time =     278.75 ms /    84 tokens (    3.32 ms per token,   301.34 tokens per second)
llama_print_timings:        eval time =    1505.52 ms /    15 runs   (  100.37 ms per token,     9.96 tokens per second)
llama_print_timings:       total time =    2051.10 ms /    99 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Mr President, let me express my thanks for the preceding remarks and extend a special welcome to my honourable colleague the Transport Commissioner.
hyp1: Mr President, I would like to extend a warm welcome to the Transport Commissioner.
hyp2: Madam President, I would like to extend a warm welcome to the Transport Commissioner.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       9.40 ms /    16 runs   (    0.59 ms per token,  1701.40 tokens per second)
llama_print_timings: prompt eval time =     297.01 ms /   108 tokens (    2.75 ms per token,   363.63 tokens per second)
llama_print_timings:        eval time =    1858.07 ms /    15 runs   (  123.87 ms per token,     8.07 tokens per second)
llama_print_timings:       total time =    2510.38 ms /   123 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: We can replace 16 out of 626 Members of the European Parliament, but we can never again as voters decide to bring in a new law.
hyp1: As voters we cannot decide on a new law, but we can replace 16 out of 626 Members of the European Parliament.
hyp2: As voters decide on a new law, we can replace 16 out of 626 Members of the European Parliament.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.44 ms /    16 runs   (    0.46 ms per token,  2151.41 tokens per second)
llama_print_timings: prompt eval time =     292.96 ms /    88 tokens (    3.33 ms per token,   300.38 tokens per second)
llama_print_timings:        eval time =    1681.97 ms /    15 runs   (  112.13 ms per token,     8.92 tokens per second)
llama_print_timings:       total time =    2245.92 ms /   103 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: The Council is making a mistake by leaving it entirely up to the United States to formulate proposals for a solution to the Middle East problem.
hyp1: The United States should not be the only ones to come up with a solution to the Middle East problem
hyp2: The United States needs to come up with a solution to the Middle East problem, not the Council.
label: [/INST] 
 
hyp2


In [73]:
print(prediction_en)

[{'id': 0, 'label': 'hyp1', 'explanation': ''}, {'id': 1, 'label': 'hyp1', 'explanation': ''}, {'id': 2, 'label': 'hyp1', 'explanation': ''}, {'id': 3, 'label': 'hyp1', 'explanation': ''}, {'id': 4, 'label': 'hyp2', 'explanation': ''}, {'id': 5, 'label': 'hyp1', 'explanation': ''}, {'id': 6, 'label': 'hyp1', 'explanation': ''}, {'id': 7, 'label': 'hyp1', 'explanation': ''}, {'id': 8, 'label': 'hyp1', 'explanation': ''}, {'id': 9, 'label': 'hyp2', 'explanation': ''}, {'id': 10, 'label': 'hyp1', 'explanation': ''}, {'id': 11, 'label': 'hyp1', 'explanation': ''}, {'id': 12, 'label': 'hyp1', 'explanation': ''}, {'id': 13, 'label': 'hyp1', 'explanation': ''}, {'id': 14, 'label': 'hyp2', 'explanation': ''}, {'id': 15, 'label': 'hyp1', 'explanation': ''}, {'id': 16, 'label': 'hyp1', 'explanation': ''}, {'id': 17, 'label': 'hyp1', 'explanation': ''}, {'id': 18, 'label': 'hyp1', 'explanation': ''}, {'id': 19, 'label': 'hyp2', 'explanation': ''}, {'id': 20, 'label': 'hyp1', 'explanation': ''}, {

In [None]:
# List to store the results.
prediction_sv = []

for i in range(0,len(test_ds['test_detection_swedish'])):
  src = GoogleTranslator(source='sv', target='en').translate(test_ds['test_detection_swedish'][i]['source'])
  hyp1 = GoogleTranslator(source='sv', target='en').translate(test_ds['test_detection_swedish'][i]['hyp1'])
  hyp2 = GoogleTranslator(source='sv', target='en').translate(test_ds['test_detection_swedish'][i]['hyp2'])
  id = test_ds['test_detection_swedish'][i]['id']

  current_sample = prompt_context+"src: "+src+"\nhyp1: "+hyp1 + '\nhyp2: '+hyp2+'\nlabel: [/INST] \n '
  prompt = few_shot_samples_sv+current_sample

  #print(prompt)
  #print(current_sample)

  response = lcpp_llm(
        prompt=prompt,
        temperature= 0.2,
        logprobs=1,
        #max_tokens =1
      )

  #print(response)

  answer = str(response["choices"][0]["text"]).strip()
  #print(answer)
  answer = answer[:4]
  #answer = answer.split()[0]
  # Sometime output contains a '.' remove it!
  #answer = answer.replace('.','')

  # If the predicted word is not in emotion list just replace with neutral.
  #if answer not in replies_list:

  #current_sample += answer + " \n "

  print("GENERATED: "+ current_sample+'\n'+answer)

  current_element = {
        "id": id,
        "label": answer,
        "explanation": ""
    }
  prediction_sv.append(current_element)


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.08 ms /    16 runs   (    0.50 ms per token,  1980.69 tokens per second)
llama_print_timings: prompt eval time =    4018.19 ms /  2593 tokens (    1.55 ms per token,   645.32 tokens per second)
llama_print_timings:        eval time =    1519.75 ms /    15 runs   (  101.32 ms per token,     9.87 tokens per second)
llama_print_timings:       total time =   13216.20 ms /  2608 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Women will face higher car insurance premiums.
hyp1: This means women can expect to pay higher prices for their vehicle supplement insurance.
hyp2: Women will receive higher car insurance premiums.
label: [/INST] 
 
hyp2


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.67 ms /    16 runs   (    0.48 ms per token,  2086.59 tokens per second)
llama_print_timings: prompt eval time =     323.04 ms /   104 tokens (    3.11 ms per token,   321.94 tokens per second)
llama_print_timings:        eval time =    1551.31 ms /    15 runs   (  103.42 ms per token,     9.67 tokens per second)
llama_print_timings:       total time =    2173.91 ms /   119 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Operating income was $1.45 billion, up from last year's earnings of $1.38 billion.
hyp1: Revenue from the operation was $1.45 billion, down from the previous year's result of $1.38 billion.
hyp2: Revenue from the operation was $1.45 billion, up from the previous year's result of $1.38 billion.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       7.93 ms /    16 runs   (    0.50 ms per token,  2017.15 tokens per second)
llama_print_timings: prompt eval time =     280.02 ms /    60 tokens (    4.67 ms per token,   214.27 tokens per second)
llama_print_timings:        eval time =    1587.35 ms /    15 runs   (  105.82 ms per token,     9.45 tokens per second)
llama_print_timings:       total time =    2056.53 ms /    75 tokens


GENERATED: [INST] Which one of hyp1 and hyp2 is not supported by src?

src: Mandela back in hospital in 'serious but stable' condition.
hyp1: Mandela does not return to hospital in serious, but stable, health.
hyp2: Mandela returns to hospital in serious but stable condition.
label: [/INST] 
 
hyp1


Llama.generate: prefix-match hit

llama_print_timings:        load time =    1301.19 ms
llama_print_timings:      sample time =       8.46 ms /    16 runs   (    0.53 ms per token,  1891.48 tokens per second)
llama_print_timings: prompt eval time =     327.76 ms /   114 tokens (    2.88 ms per token,   347.81 tokens per second)
llama_print_timings:        eval time =    1552.58 ms /    15 runs   (  103.51 ms per token,     9.66 tokens per second)
llama_print_timings:       total time =    2235.44 ms /   129 tokens


# Function to create the CSV files containing the predictions.

In [None]:
import csv

def create_csv_prediction_file (data, filename):

  # Data to be written to the CSV file
  #data = [
  #    {"id": 1, "label": "A", "explanation": "Explanation for A"},
  #    {"id": 2, "label": "B", "explanation": "Explanation for B"},
  #    {"id": 3, "label": "C", "explanation": "Explanation for C"},
  #]

  # Name of the CSV file
  #csv_file = "data.csv"
  csv_file = filename

  # Field names
  fields = ["id", "label", "explanation"]

  # Writing to CSV file
  with open(csv_file, mode='w', newline='') as file:
      writer = csv.DictWriter(file, fieldnames=fields)

      # Write the header
      writer.writeheader()

      # Write the data
      for row in data:
          writer.writerow(row)

  print("CSV file created successfully.")


In [None]:
create_csv_prediction_file (prediction_en, 'eloquent2024_mc_mistral_en_prediction.csv')
create_csv_prediction_file (prediction_sv, 'eloquent2024_mc_mistral_sv_prediction.csv')

In [None]:
from google.colab import files
files.download('eloquent2024_mc_mistral_en_prediction.csv')
files.download('eloquent2024_mc_mistral_sv_prediction.csv')