In [1]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",
    "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
    "unsloth/gemma-2b-bnb-4bit",
    "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-7b-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Gemma patching release 2024.3
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/5.57G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [3]:
import bitsandbytes as bnb
def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
  lora_module_names = set()
  for name, module in model.named_modules():
    if isinstance(module, cls):
      names = name.split('.')
      lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names: # needed for 16-bit
      lora_module_names.remove('lm_head')
  return list(lora_module_names)

In [4]:
modules = find_all_linear_names(model)
print(modules)

['k_proj', 'v_proj', 'q_proj', 'gate_proj', 'down_proj', 'up_proj', 'o_proj']


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = modules,
    lora_alpha = 32,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


<a name="Data"></a>
### Data Prep
We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.

**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).

**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!

If you want to use the `ChatML` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1Aau3lgPzeZKQ-98h69CCu1UJcvIBLmy2?usp=sharing).

For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing).

In [None]:
from datasets import load_dataset
dataset = load_dataset("Helsinki-NLP/opus-100",'de-en', split = "train")
dataset

Downloading data:   0%|          | 0.00/253k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/116M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/254k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset({
    features: ['translation'],
    num_rows: 1000000
})

In [None]:
df = dataset.to_pandas()


Unnamed: 0,translation
0,{'de': 'Deine Habgier wird noch dein Tod sein....
1,"{'de': '- Vega.', 'en': 'Vega.'}"
2,"{'de': 'Sagen Sie einfach stopp.', 'en': 'Just..."
3,"{'de': '- Warte.', 'en': '- Wait.'}"
4,"{'de': 'Ich will nicht hier sein.', 'en': 'I d..."
5,"{'de': 'Also, 90 Prozent meiner fotografischen..."
6,"{'de': 'Wie bezaubernd.', 'en': 'So lovely.'}"
7,"{'de': 'Vielen Dank, Colonel.', 'en': 'Thank y..."
8,"{'de': 'Martin!', 'en': 'Martin!'}"
9,"{'de': 'Muss ich?', 'en': 'Do I have to?'}"




No charts were generated by quickchart


In [None]:
df.head(10)

Unnamed: 0,translation
0,{'de': 'Deine Habgier wird noch dein Tod sein....
1,"{'de': '- Vega.', 'en': 'Vega.'}"
2,"{'de': 'Sagen Sie einfach stopp.', 'en': 'Just..."
3,"{'de': '- Warte.', 'en': '- Wait.'}"
4,"{'de': 'Ich will nicht hier sein.', 'en': 'I d..."
5,"{'de': 'Also, 90 Prozent meiner fotografischen..."
6,"{'de': 'Wie bezaubernd.', 'en': 'So lovely.'}"
7,"{'de': 'Vielen Dank, Colonel.', 'en': 'Thank y..."
8,"{'de': 'Martin!', 'en': 'Martin!'}"
9,"{'de': 'Muss ich?', 'en': 'Do I have to?'}"


In [None]:
import pandas as pd
from datasets import Dataset, load_dataset
from huggingface_hub import notebook_login, HfApi


formatted_texts = [
    f'german : {example["de"]}\n<## Translation ## >\nenglish : {example["en"]}'
    for example in df['translation']
]

# Create a new DataFrame with the formatted text
formatted_df = pd.DataFrame({'formatted_text': formatted_texts})

# Convert the new DataFrame into a Hugging Face Dataset
formatted_dataset = Dataset.from_pandas(formatted_df)

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
api = HfApi()

In [None]:
df.head(30)

Unnamed: 0,formatted_text
0,german : Deine Habgier wird noch dein Tod sein...
1,german : - Vega.\n<## Translation ## >\nenglis...
2,german : Sagen Sie einfach stopp.\n<## Transla...
3,german : - Warte.\n<## Translation ## >\nengli...
4,german : Ich will nicht hier sein.\n<## Transl...
5,"german : Also, 90 Prozent meiner fotografische..."
6,german : Wie bezaubernd.\n<## Translation ## >...
7,"german : Vielen Dank, Colonel.\n<## Translatio..."
8,german : Martin!\n<## Translation ## >\nenglis...
9,german : Muss ich?\n<## Translation ## >\nengl...


In [None]:
def is_time_format(text):
    try:
        parts = text.split('\n<## Translation ## >\n')
        german, english = parts[0].split('german : ')[1], parts[1].split('english : ')[1]
        return all(part.replace(':', '').isdigit() and len(part.replace(':', '')) == 4 for part in [german, english])
    except IndexError:
        return False

filtered_df = df[~df['formatted_text'].apply(is_time_format)]
filtered_dataset = Dataset.from_pandas(filtered_df)

In [None]:
dataset_name = 'opus-100-German-to-English'
api.create_repo(repo_id=dataset_name, repo_type='dataset', private=False)

RepoUrl('https://huggingface.co/datasets/Samvardhan777/opus-100-German-to-English', endpoint='https://huggingface.co', repo_type='dataset', repo_id='Samvardhan777/opus-100-German-to-English')

In [None]:
dataset_name = 'filtered_dataset'

In [None]:
formatted_dataset.push_to_hub(dataset_name)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1000 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Samvardhan777/filtered_dataset/commit/62572c51bff1716e1255c5684f9b6a0d42b3f90d', commit_message='Upload dataset', commit_description='', oid='62572c51bff1716e1255c5684f9b6a0d42b3f90d', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
from datasets import load_dataset
dataset = load_dataset("Samvardhan777/opus-100-German-to-English", split = "train")
dataset

Downloading readme:   0%|          | 0.00/293 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/114M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Dataset({
    features: ['formatted_text'],
    num_rows: 1000000
})

In [None]:
df = dataset.to_pandas()

In [None]:
df.head(10)

Unnamed: 0,formatted_text
0,german : Deine Habgier wird noch dein Tod sein...
1,german : - Vega.\n<## Translation ## >\nenglis...
2,german : Sagen Sie einfach stopp.\n<## Transla...
3,german : - Warte.\n<## Translation ## >\nengli...
4,german : Ich will nicht hier sein.\n<## Transl...
5,"german : Also, 90 Prozent meiner fotografische..."
6,german : Wie bezaubernd.\n<## Translation ## >...
7,"german : Vielen Dank, Colonel.\n<## Translatio..."
8,german : Martin!\n<## Translation ## >\nenglis...
9,german : Muss ich?\n<## Translation ## >\nengl...


In [None]:
from datasets import load_dataset
dataset = load_dataset("kde4", lang1="de", lang2="en")
df = dataset['train'].to_pandas()

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [None]:
df.head(10)

Unnamed: 0,id,translation
0,0,"{'de': 'Lauri Watts', 'en': 'Lauri Watts'}"
1,1,"{'de': '& Lauri.Watts.mail;', 'en': '& Lauri. ..."
2,2,{'de': 'Frank Schütte F.Schuette@t-online.de Ü...
3,3,"{'de': '2006-02-26 3.5.1', 'en': '2006-02-26 3..."
4,4,{'de': 'Mit Hilfe des & konqueror;-Modul Babel...
5,5,"{'de': 'KDE', 'en': 'KDE'}"
6,6,"{'de': 'kdeaddons', 'en': 'kdeaddons'}"
7,7,"{'de': 'konqueror', 'en': 'konqueror'}"
8,8,"{'de': 'Module', 'en': 'plugins'}"
9,9,"{'de': 'Babelfish', 'en': 'babelfish'}"


In [None]:
from huggingface_hub import notebook_login, HfApi
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from datasets import Dataset, load_dataset
formatted_texts = [
    f'german : {example["de"]}\n<## Translation ## >\nenglish : {example["en"]}'
    for example in df['translation']
]

# Create a new DataFrame with the formatted text
kde_formatted_df = pd.DataFrame({'formatted_text': formatted_texts})

# Convert the new DataFrame into a Hugging Face Dataset
kde_formatted_dataset = Dataset.from_pandas(kde_formatted_df)

In [None]:
kde_formatted_df.head(10)

Unnamed: 0,formatted_text
0,german : Lauri Watts\n<## Translation ## >\nen...
1,german : & Lauri.Watts.mail;\n<## Translation ...
2,german : Frank Schütte F.Schuette@t-online.de ...
3,german : 2006-02-26 3.5.1\n<## Translation ## ...
4,german : Mit Hilfe des & konqueror;-Modul Babe...
5,german : KDE\n<## Translation ## >\nenglish : KDE
6,german : kdeaddons\n<## Translation ## >\nengl...
7,german : konqueror\n<## Translation ## >\nengl...
8,german : Module\n<## Translation ## >\nenglish...
9,german : Babelfish\n<## Translation ## >\nengl...


In [None]:
api = HfApi()

In [None]:
dataset_name = 'kde4-German-to-English'
api.create_repo(repo_id=dataset_name, repo_type='dataset', private=False)

In [None]:
kde_formatted_dataset.push_to_hub(dataset_name)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/225 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Samvardhan777/kde4-German-to-English/commit/d1024a55cc4628be6d6a8bf0a567a8eb05957b2e', commit_message='Upload dataset', commit_description='', oid='d1024a55cc4628be6d6a8bf0a567a8eb05957b2e', pr_url=None, pr_revision=None, pr_num=None)

In [6]:
from datasets import load_dataset
dataset = load_dataset("Samvardhan777/kde4-German-to-English", split = "train")
dataset

Downloading readme:   0%|          | 0.00/289 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/224035 [00:00<?, ? examples/s]

Dataset({
    features: ['formatted_text'],
    num_rows: 224035
})

In [7]:
df = dataset.to_pandas()

In [9]:
df.head(10)

Unnamed: 0,formatted_text
0,german : Lauri Watts\n<## Translation ## >\nen...
1,german : & Lauri.Watts.mail;\n<## Translation ...
2,german : Frank Schütte F.Schuette@t-online.de ...
3,german : 2006-02-26 3.5.1\n<## Translation ## ...
4,german : Mit Hilfe des & konqueror;-Modul Babe...
5,german : KDE\n<## Translation ## >\nenglish : KDE
6,german : kdeaddons\n<## Translation ## >\nengl...
7,german : konqueror\n<## Translation ## >\nengl...
8,german : Module\n<## Translation ## >\nenglish...
9,german : Babelfish\n<## Translation ## >\nengl...


<a name="Train"></a>
### Train the model
Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!

In [10]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "formatted_text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/224035 [00:00<?, ? examples/s]

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
5.938 GB of memory reserved.


In [11]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 224,035 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 200,015,872


Step,Training Loss
1,5.515
2,3.9253
3,4.4006
4,3.035
5,2.7094
6,2.5134
7,2.6056
8,2.8121
9,2.6821
10,2.8137


 You can also use a `TextStreamer` for continuous inference - so you can see the generation token by token, instead of waiting the whole time!

In [26]:
from transformers import TextStreamer
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
  prompt_template = """
  German:{query}
  \n<## Translation ## >\n
  English:'
  """


  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  text_streamer = TextStreamer(tokenizer)
  generated_ids = model.generate(**model_inputs, streamer = text_streamer, max_new_tokens = 128)
  decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  return (decoded)

In [27]:
result = get_completion(query='''Lieber Thomas!
Jetzt bist du weit weg! Ich bin sehr unglücklich! Wie geht es dir in Hamburg? Hast du nette Kollegen in der Bank? Ist der Chef nett? Hast du schon eine Wohnung? Ist die Wohnung teuer? Ich arbeite schon zwei Wochen im Goethe-Gymnasium in München.

Die Kollegen und Kolleginnen sind sehr freundlich. Die Schülerinnen und Schüler sind auch sehr nett. München ist schön! Das Wetter ist gut. Aber meine Katze "Mimi" ist krank! Das ist schrecklich. Heute Abend gehe ich ins Theater.

Herzliche Grüße''', model=model, tokenizer=tokenizer)
print(result)

<bos>
  German:Lieber Thomas!
Jetzt bist du weit weg! Ich bin sehr unglücklich! Wie geht es dir in Hamburg? Hast du nette Kollegen in der Bank? Ist der Chef nett? Hast du schon eine Wohnung? Ist die Wohnung teuer? Ich arbeite schon zwei Wochen im Goethe-Gymnasium in München.

Die Kollegen und Kolleginnen sind sehr freundlich. Die Schülerinnen und Schüler sind auch sehr nett. München ist schön! Das Wetter ist gut. Aber meine Katze "Mimi" ist krank! Das ist schrecklich. Heute Abend gehe ich ins Theater.

Herzliche Grüße
  
<## Translation ## >

  English:'
  Dear Thomas!
  You are far away now! I am very unhappy! How are you in Hamburg? Do you have nice colleagues at the bank? Is the boss nice? Do you have a flat yet? Is the flat expensive? I have been working at the Goethe-Gymnasium in Munich for two weeks now.

  The colleagues and the students are very friendly. The students are also very nice. Munich is nice! The weather is good. But my cat "Mimi" is sick! That is terrible. I am goin

<a name="Save"></a>
### Saving, loading finetuned models
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.

**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

In [33]:
model.save_pretrained("gemma-7b-unsloth-german-to-English")
model.push_to_hub("gemma-7b-unsloth-german-to-English")

README.md:   0%|          | 0.00/576 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/800M [00:00<?, ?B/s]

Saved model to https://huggingface.co/gemma-7b-unsloth-german-to-English


Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:

# Testing

In [40]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",
    "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
    "unsloth/gemma-2b-bnb-4bit",
    "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-7b-it-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Gemma patching release 2024.3
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/5.57G [00:00<?, ?B/s]

ValueError: 
                    Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
                    quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
                    in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to
                    `from_pretrained`. Check
                    https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                    for more details.
                    

In [None]:
result = get_completion(query='''Lieber Thomas!
Jetzt bist du weit weg! Ich bin sehr unglücklich! Wie geht es dir in Hamburg? Hast du nette Kollegen in der Bank? Ist der Chef nett? Hast du schon eine Wohnung? Ist die Wohnung teuer? Ich arbeite schon zwei Wochen im Goethe-Gymnasium in München.

Die Kollegen und Kolleginnen sind sehr freundlich. Die Schülerinnen und Schüler sind auch sehr nett. München ist schön! Das Wetter ist gut. Aber meine Katze "Mimi" ist krank! Das ist schrecklich. Heute Abend gehe ich ins Theater.

Herzliche Grüße''', model=model_finetune, tokenizer=tokenizer_finetune)
print(result)