# Task 1: Gen AI
By Saptarshi Bhattacharya

In [1]:
%pip install datasets transformers accelerate evaluate peft bitsandbytes trl groq

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting trl
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting groq
  Downloading groq-0.19.0-py3-none-any.whl.metadata (15 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12=

In [2]:
import os
import random
import re
import gc
from getpass import getpass

import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, AutoPeftModelForCausalLM
from trl import SFTTrainer
import torch
import evaluate
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource

from datasets import load_dataset, Dataset, concatenate_datasets
from google.colab import userdata
from groq import Groq

In [3]:
# Check if `userdata` exists (specific to Google Colab)
try:
    import google.colab.userdata as userdata
except ImportError:
    userdata = {}

# Function to get API keys safely
def get_api_key(key_name):
    try:
        key = userdata.get(key_name)  # Fetch from Colab secrets
    except Exception:  # Catch SecretNotFoundError or any other issues
        key = None

    if not key:  # If missing, ask for manual input
        key = getpass(f"Enter {key_name} here: ")

    return key

# Load API keys
WANDB_API_KEY = get_api_key("WANDB_API_KEY")
os.environ["WANDB_API_KEY"] = WANDB_API_KEY

GROQ_API_KEY = get_api_key("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# Initialize Groq client
from groq import Groq
client = Groq(api_key=GROQ_API_KEY)


## 1. Load Flores and split it in 80:20 ratio

In [4]:
# Load FLORES-200 dataset
flores = load_dataset("facebook/flores", "all", trust_remote_code=True)

# Select 1000 elements from dataset
number_of_samples = 1000
flores_subset = flores["devtest"].shuffle(seed=42).select(range(number_of_samples))

def format_translation(example):
    return {
        "messages": [
            {"role": "user", "content": example["sentence_deu_Latn"]},
            {"role": "assistant", "content": example["sentence_fra_Latn"]}
        ]
    }

flores_formatted = flores_subset.map(format_translation)

# Split dataset into train and test
dataset_A = flores_formatted.train_test_split(test_size=0.2, seed=42)

# Drop useless columns
dataset_A = {
    key: dataset.remove_columns(
        [col for col in dataset.column_names if col != "messages"]
    )
    for key, dataset in dataset_A.items()
}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/11.8k [00:00<?, ?B/s]

flores.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.6M [00:00<?, ?B/s]

Generating dev split: 0 examples [00:00, ? examples/s]

Generating devtest split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

## 2. Load pre-trained model



In [5]:
model_A = "Qwen/Qwen2-1.5B"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained(
    model_A,
    torch_dtype="auto",
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_A, trust_remote_code=True)

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

## 3. Evaluate `Model_A` on the `dataset_A['test']` using Bleu.

Define the function for translation

In [6]:
def deutsch_to_french(input_sentence, model, tokenizer, device):
    # Create a more dynamic prompt for translation
    translation_prompt = (
        f"Convert the German text below into French: {input_sentence}\n"
        "Translated Text (French):"
    )

    # Tokenize the input with additional configurations
    tokenized_input = tokenizer(
        translation_prompt,
        return_tensors="pt",
        truncation=True,
        padding=True
    ).to(device)

    # Generate translation with enhanced parameters
    with torch.no_grad():
        translated_output = model.generate(
            input_ids=tokenized_input.input_ids,
            attention_mask=tokenized_input.attention_mask,
            max_new_tokens=100,  # Limit output length
            num_beams=4,           # Use beam search for better quality
            early_stopping=True,   # Stop generation early if appropriate
            no_repeat_ngram_size=2,  # Avoid repetition
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode the generated tokens into text
    decoded_output = tokenizer.decode(
        translated_output[0],
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True
    )

    # Extract the translated text using flexible parsing
    translation_markers = [
        "Translated Text (French):",
        "French Translation:",
        "Traduction française:"
    ]

    for marker in translation_markers:
        if marker in decoded_output:
            translated_text = decoded_output.split(marker)[-1].strip()
            # Remove any trailing quotes or special characters
            translated_text = translated_text.strip(' "\n«»')
            return translated_text.split("\n")[0].strip()

    # Fallback: Return the entire output if markers are not found
    return decoded_output.strip()

In [7]:
def deutsch_to_french_set(dataset, model, tokenizer, device):
    predictions = []

    # Iterate through the dataset and translate each German sentence
    for i, data in enumerate(dataset):
        german_sentence = data["messages"][0]["content"]

        # Call the translation function for each sentence
        french_translation = deutsch_to_french(german_sentence, model, tokenizer, device)

        # Append the French translation to predictions
        predictions.append(french_translation)

        # Print progress of translation
        print(f"Translated {i + 1}/{len(dataset)}: {german_sentence} -> {french_translation}")

    return predictions


### Loading the chosen testing metric (BLEU)

In [8]:
bleu = evaluate.load("bleu")

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

In [9]:
def bleu_result(dataset, model, tokenizer, device):
    # Load BLEU metric
    bleu =  evaluate.load("bleu")

    # Prepare the reference sentences (French translations in the dataset)
    reference_sentences = [[data["messages"][1]["content"]] for data in dataset]

    # Get the predicted French translations using the translation function
    predictions = deutsch_to_french_set(dataset, model, tokenizer, device)

    # Compute BLEU score
    bleu_score = bleu.compute(predictions=predictions, references=reference_sentences)

    # Print the result
    print("BLEU Score:", bleu_score)

    return bleu_score

In [10]:
result_A = bleu_result(dataset_A["test"], model, tokenizer, device)

Translated 1/200: Der Fotograf wurde in das Ronald Reagan UCLA Medical Center gebracht, wo er anschließend verstarb. -> Le photographe a été transporté à l'hôpital Ronald-Reagan de l'Université UCLA, où il est ensuite décédé.
Translated 2/200: Dr. Ehud Ur, Professor für Medizin an der Dalhousie University in Halifax, Nova Scotia, und Vorsitzender der Abteilung für Klinik und Wissenschaft des Kanadischen Diabetesverbands gab zu bedenken, dass die Forschungsarbeit noch in den Kinderschuhen stecke. -> Professeur de médecine à l'Université de Halifax en Nouvelle-Écosse et président de la section de clinique et de recherche du Canada du Syndicat canadien des diabétiques, Prof. Ehu Ur a souligné qu'il reste encore du travail à faire dans les chaussures de bébé pour la recherche médicale.
Translated 3/200: Starke Winde, Hagel, übermäßige Niederschläge und Waldbrände sind Formen und Auswirkungen von Unwetter, ebenso wie Gewitter, Tornados, Wasserhosen und Wirbelstürme. -> Les vents forts, le v

In [11]:
del model

## 4. Finetuning `model_A` on `dataset_A['train']` to create `model_B`

In [12]:
def finetune_model(model_name, dataset, output_dir="./results", num_epochs=1, output_model_name="model-b", merge_lora=True):
    # Manage memory before starting fine-tuning
    gc.collect()
    torch.cuda.empty_cache()

    # Configure 4-bit quantization
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",  # Non-Factorized Quantization
        bnb_4bit_compute_dtype="float16",
        bnb_4bit_use_double_quant=True,
    )

    try:
        # Load model with quantization
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            quantization_config=bnb_config,
        )
        model.config.use_cache = False
        model.config.pretraining_tp = 1  # Number of Tensor Parallelism

    except Exception as e:
        print(f"Error loading model: {e}")
        return None

    # Load tokenizer
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    except Exception as e:
        print(f"Error loading tokenizer: {e}")
        return None

    # Configure LoRA for fine-tuning
    peft_config = LoraConfig(
        lora_alpha=32,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "k_proj", "gate_proj", "v_proj", "up_proj",
            "q_proj", "o_proj", "down_proj"
        ],
    )

    # Prepare the model for LoRA training (4-bit quantization ready)
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, peft_config)

    # Training arguments
    training_arguments = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        num_train_epochs=num_epochs,
        logging_steps=10,
        fp16=True,
        gradient_checkpointing=True
    )

    # Trainer setup for fine-tuning
    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        tokenizer=tokenizer,
        args=training_arguments,
        peft_config=peft_config,
    )

    # Start fine-tuning
    trainer.train()

    # Save fine-tuned model
    trainer.model.save_pretrained(output_model_name)

    # Optionally merge LoRA weights with the base model
    if merge_lora:
        print("Merging LoRA weights with the base model...")
        try:
            model = AutoPeftModelForCausalLM.from_pretrained(
                output_model_name,
                low_cpu_mem_usage=True,
                device_map="auto",
                offload_folder="./offload"
            )
            merged_model = model.merge_and_unload()
            return merged_model

        except Exception as e:
            print(f"Error during LoRA merge: {e}")
            return None

    return model

In [13]:
model_B = finetune_model(model_name=model_A, dataset=dataset_A["train"], output_dir="./finetuned_model", num_epochs=1, output_model_name="model_B")

  trainer = SFTTrainer(


Converting train dataset to ChatML:   0%|          | 0/800 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msaptarshib98[0m ([33msaptarshib98-rptu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,1.8475
20,1.5716
30,1.5668
40,1.5097
50,1.5555
60,1.4669
70,1.4856
80,1.5107
90,1.5369
100,1.4839


Merging LoRA weights with the base model...


## 5. Evaluating `model_B` on `dataset_A['test']`

In [14]:
result_B = bleu_result(dataset_A["test"], model_B, tokenizer, device)

Translated 1/200: Der Fotograf wurde in das Ronald Reagan UCLA Medical Center gebracht, wo er anschließend verstarb. -> Le photographe a été transporté à l'hôpital Ronald-Reagan de l'Université de Californie à Los Angeles, où il est décédé ensuite.
Translated 2/200: Dr. Ehud Ur, Professor für Medizin an der Dalhousie University in Halifax, Nova Scotia, und Vorsitzender der Abteilung für Klinik und Wissenschaft des Kanadischen Diabetesverbands gab zu bedenken, dass die Forschungsarbeit noch in den Kinderschuhen stecke. -> Professeur de médecine à l'Université Dalhaussie à Halifax (Nouvelle-Écosse) et président de la section de clinique et de recherche du Canadian Diabetes Association, Dr Ehude Ur a souligné que les recherches sont encore dans les chaussures des enfants.
Translated 3/200: Starke Winde, Hagel, übermäßige Niederschläge und Waldbrände sind Formen und Auswirkungen von Unwetter, ebenso wie Gewitter, Tornados, Wasserhosen und Wirbelstürme. -> Les vents forts, le vent, les plui

In [15]:
del model_B

## 6. Generating Synthetic Dataset `dataset_B`, twice the size of `dataset_A['train']` using larger model

Define the prompt template

In [16]:
def generate_prompt(dataset_samples, num_samples):
    prompt = (
        "You are a highly skilled AI specializing in German-to-French translation. "
        "Below is a dataset of German sentences and their corresponding French translations. "
        "Your task is to generate new, high-quality German sentences along with their accurate French translations. "
        "The new sentences should resemble the dataset in terms of style, structure, and vocabulary.\n\n"
        "Dataset Examples:\n"
    )

    # Add examples from the dataset to the prompt
    for example in dataset_samples:
        german = example['messages'][0]['content']
        french = example['messages'][1]['content']
        prompt += f"- German: {german}\n  French: {french}\n"

    prompt += (
        f"\nInstructions:\n"
        f"1. Generate exactly {num_samples} new German sentences along with their corresponding French translations.\n"
        f"2. Ensure the new sentences are **diverse**, covering a variety of topics and sentence structures.\n"
        f"3. Do **not** copy or modify existing sentences; generate entirely new ones.\n"
        f"4. Ensure that each translation is **accurate and natural**.\n"
        f"5. Avoid repetition and incomplete responses.\n\n"
        f"Output Format (strictly follow this format):\n"
        f"1. German: [Generated German Sentence]\n"
        f"   French: [Generated French Translation]\n"
        f"2. German: [Next Generated German Sentence]\n"
        f"   French: [Next Generated French Translation]\n"
        f"Continue until exactly {num_samples} pairs are provided.\n\n"
        f"Do not generate fewer than {num_samples} sentences."
    )
    return prompt


Defining the Dataset generation function

In [17]:
def generate_synthetic_data(dataset_samples, num_samples, batch_size):
    synthetic_data = []
    remaining_samples = num_samples

    while remaining_samples > 0:
        current_batch_size = min(batch_size, remaining_samples)
        print(f"Need to generate: {current_batch_size} samples (Remaining: {remaining_samples})")

        # Generate prompt with explicit formatting instructions
        sample_size = min(5, len(dataset_samples))
        samples = random.sample(list(dataset_samples), sample_size)
        prompt = generate_prompt(samples, current_batch_size)  # Ensure prompt specifies strict formatting

        # API Call
        messages = [{"role": "user", "content": prompt}]
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=messages,
        )

        valid_pairs = 0
        if completion and completion.choices:
            generated_text = completion.choices[0].message.content.strip()

            # Preprocess: Remove non-pair lines and clean up
            cleaned_pairs = []
            for line in generated_text.split('\n'):
                line = line.strip()
                if line and ('German:' in line or 'French:' in line):
                    cleaned_pairs.append(line)

            # Pair processing with flexible parsing
            i = 0
            while i < len(cleaned_pairs)-1:
                german_line = cleaned_pairs[i]
                french_line = cleaned_pairs[i+1] if (i+1 < len(cleaned_pairs)) else ""

                # Extract German text
                german_match = re.match(r'(?:\d+\.\s*)?German:\s*(.*)', german_line, re.IGNORECASE)
                if german_match:
                    german = german_match.group(1).strip()
                    # Extract French text
                    french_match = re.match(r'(?:\d+\.\s*)?French:\s*(.*)', french_line, re.IGNORECASE)
                    if french_match:
                        french = french_match.group(1).strip()
                        synthetic_data.append((german, french))
                        valid_pairs += 1
                        i += 2  # Skip next line since it's part of this pair
                        continue
                i += 1  # Move to next line if no match

        print(f"Successfully parsed: {valid_pairs} pairs")
        remaining_samples -= valid_pairs  # Key fix: Track actual progress

    return synthetic_data[:num_samples]  # Ensure exact number if overgenerated

Generating synthetic dataset

In [18]:
new_db = generate_synthetic_data(dataset_A["train"], num_samples=len(dataset_A["train"])*2, batch_size=100)

# Format the synthetic dataset to match the structure of dataset_A
dataset_B = Dataset.from_list([
    {"messages": [
        {"role": "user", "content": german},
        {"role": "assistant", "content": french}
    ]}
    for german, french in new_db
])

Need to generate: 100 samples (Remaining: 1600)
Successfully parsed: 16 pairs
Need to generate: 100 samples (Remaining: 1584)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 1484)
Successfully parsed: 18 pairs
Need to generate: 100 samples (Remaining: 1466)
Successfully parsed: 18 pairs
Need to generate: 100 samples (Remaining: 1448)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 1348)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 1248)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 1148)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 1048)
Successfully parsed: 13 pairs
Need to generate: 100 samples (Remaining: 1035)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 935)
Successfully parsed: 100 pairs
Need to generate: 100 samples (Remaining: 835)
Successfully parsed: 11 pairs
Need to generate: 100 samples (Remaining: 824)
Successfully

Check generated dataset

In [19]:
print("Size of the generated data set: ",len(dataset_B))
print("Data type: ", type(dataset_B))

Size of the generated data set:  1600
Data type:  <class 'datasets.arrow_dataset.Dataset'>


Generating predictions from model c of the new synthetic dataset

## 7. Finetuning `model_A` on the new synthesized dataset `dataset_B` to create `model_C`

In [20]:
model_C = finetune_model(model_name=model_A, dataset=dataset_B, output_dir="./finetuned_model", num_epochs=1, output_model_name="model_C")

  trainer = SFTTrainer(


Converting train dataset to ChatML:   0%|          | 0/1600 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1600 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1600 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1600 [00:00<?, ? examples/s]

Step,Training Loss
10,1.1372
20,0.595
30,0.53
40,0.4633
50,0.4195
60,0.4242
70,0.3891
80,0.3486
90,0.3958
100,0.3484


Merging LoRA weights with the base model...


## 8. Evaluate `model_C` on `dataset_A['test']`

In [21]:
result_C = bleu_result(dataset_A["test"], model_C, tokenizer, device)

Translated 1/200: Der Fotograf wurde in das Ronald Reagan UCLA Medical Center gebracht, wo er anschließend verstarb. -> Le photographe a été transporté à l'hôpital Ronald-Reagan de l'université UCLA, où il est ensuite décédé.
Translated 2/200: Dr. Ehud Ur, Professor für Medizin an der Dalhousie University in Halifax, Nova Scotia, und Vorsitzender der Abteilung für Klinik und Wissenschaft des Kanadischen Diabetesverbands gab zu bedenken, dass die Forschungsarbeit noch in den Kinderschuhen stecke. -> Professeur de médecine à l'université dalhousse en Nouvelle-Écosse à Halifax et président de la section de clinique et de recherche du Canada Diabetes Association a souligné que la recherche reste encore dans les chaussures de bébé.
Translated 3/200: Starke Winde, Hagel, übermäßige Niederschläge und Waldbrände sind Formen und Auswirkungen von Unwetter, ebenso wie Gewitter, Tornados, Wasserhosen und Wirbelstürme. -> Les vents forts, le vent, les précipitations excessives et les incendies de f

In [22]:
del model_C

## 9. Combining `dataset_A['train']` and `dataset_B` and shuffle them to create `dataset_C`

In [23]:
combined_dataset = concatenate_datasets([dataset_A['train'], dataset_B])

# Shuffle the combined dataset
dataset_C = combined_dataset.shuffle(seed=42)

## 10. Finetune `model_A` on `dataset_C` to create `model_D`

In [24]:
model_D = finetune_model(model_name=model_A, dataset=dataset_C, output_dir="./finetuned_model", num_epochs=1, output_model_name="model_D")

  trainer = SFTTrainer(


Converting train dataset to ChatML:   0%|          | 0/2400 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Step,Training Loss
10,1.5125
20,0.9902
30,1.0265
40,1.0586
50,0.9025
60,0.8646
70,0.9244
80,0.8254
90,0.8388
100,0.8849


Merging LoRA weights with the base model...


## 11. Evaluate `model_D`

In [25]:
result_D = bleu_result(dataset_A["test"], model_D, tokenizer, device)

Translated 1/200: Der Fotograf wurde in das Ronald Reagan UCLA Medical Center gebracht, wo er anschließend verstarb. -> Le photographe a été emmené à l'hôpital Reagan de l'Université de Californie à Los Angeles, où il est ensuite décédé.
Translated 2/200: Dr. Ehud Ur, Professor für Medizin an der Dalhousie University in Halifax, Nova Scotia, und Vorsitzender der Abteilung für Klinik und Wissenschaft des Kanadischen Diabetesverbands gab zu bedenken, dass die Forschungsarbeit noch in den Kinderschuhen stecke. -> Professeur d'hygiène médicale à l'université de Halifax (Nova Scotia) et président de la division de médecine et de recherche du Canadian Diabetes Association (CDA), Dr Ehud Ur a souligné que les recherches sont encore dans les chaussures de bébé.
Translated 3/200: Starke Winde, Hagel, übermäßige Niederschläge und Waldbrände sind Formen und Auswirkungen von Unwetter, ebenso wie Gewitter, Tornados, Wasserhosen und Wirbelstürme. -> Les vents forts, l'orage, les précipitations exces

## 12. Plot the performance of all models

In [26]:
# Sample data for plotting
labels = ['A', 'B', 'C', 'D']
values = [result_A['bleu'], result_B['bleu'], result_C['bleu'], result_D['bleu']]

# Round the BLEU scores to 2 decimal places
rounded_values = [round(value, 2) for value in values]

# Create data source with the rounded values for labels
data_source = ColumnDataSource(data={'labels': labels, 'values': values, 'rounded_values': rounded_values})

# Create figure
p = figure(x_range=labels, title='Evaluation of Models',
           x_axis_label='Models', y_axis_label='BLEU Score',
           toolbar_location=None, tools="")

# Add a line to connect the dots
p.line(x='labels', y='values', source=data_source, line_width=2, color='blue', legend_label="BLEU Score")

# Add dots (circle) to represent the BLEU scores
p.scatter(marker='circle', x='labels', y='values', size=10, color='red', alpha=0.6, source=data_source)

# Add text labels (rounded BLEU scores) above the dots
p.text(x='labels', y='values', source=data_source, text='rounded_values', text_align='center', text_baseline='bottom',
       text_font_size="12pt", text_color="black")

# Show the plot
output_notebook()  # Display output in a Jupyter Notebook
target_html = show(p)
