In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "openGPT-X/Teuken-7B-instruct-commercial-v0.4"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
)
model = model.to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    use_fast=False,
    trust_remote_code=True,
)

In [6]:
# Language: bg, cs, da, de, el, en, es, et, fi, fr, ga, hr, hu, it, lt, lv, mt, nl, pl, pt, ro, sk, sl, sv
language = "de"

# Temporary Strorage
conversation_storage = ""

# OPTIONAL: Store messages in Drive
# conversation_storage = "/content/drive/MyDrive/ColabGPT"

In [None]:
import os
import json
import time

# Create conversation storage folder
if conversation_storage and not os.path.exists(conversation_storage):
    os.makedirs(conversation_storage)

convesation_name = f"conversation_{language}_{time.strftime('%Y%m%d-%H%M%S')}.json"
messages = []  # Initialize an empty list for the conversation

while True:
    user_input = input("User: ")
    messages.append({"role": "User", "content": user_input})

    if user_input.lower() == "exit":
      print("Exiting...")
      break

    try:
        prompt_ids = tokenizer.apply_chat_template(
            messages, chat_template=language.upper(), tokenize=True, add_generation_prompt=True, return_tensors="pt"
        )
        prediction = model.generate(
            prompt_ids.to(model.device),
            max_length=512,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            num_return_sequences=1,
        )
        prediction_text = tokenizer.decode(prediction[0].tolist())

        # Remove the prompt from the prediction
        prompt_text = tokenizer.decode(prompt_ids[0].tolist())
        prediction_text = prediction_text[len(prompt_text):].strip()

        # Remove the </s> from the end of the prediction
        prediction_text = prediction_text[:-4]

        # print text as Assistant, max 80 chars per line
        print("Assistant:")
        for i in range(0, len(prediction_text), 80):
            print(prediction_text[i:i+80])

        messages.append({"role": "assistant", "content": prediction_text})

        # Save the conversation history as JSON
        with open(os.path.join(conversation_storage, convesation_name), "w") as f:
            json.dump(messages, f, indent=4)

    except Exception as e:
        print(f"Error during generation: {e}")
        break
