In [None]:
import  digitalhub as dh

project = dh.get_or_create_project("llmpa-test")

## 1. Create and Preprocess dataset

In [None]:
func = project.new_function(
    name="create-dataset", 
    kind="python", 
    python_version="PYTHON3_10", 
    code_src="src/fine_tuning_seq2seq.py",  
    handler="preprocess_dataset",
    requirements=["datasets[audio]==3.6.0", "transformers==4.56.1", "torch==2.8.0", "accelerate==1.10.1", "evaluate==0.4.5", "jiwer==4.0.0"]
)

In [None]:
train_run = func.run(action="job",
                     parameters={
                         "model_id": "openai/whisper-small",
                         "artifact_name": "audio-dataset",
                         "dataset_name": "mozilla-foundation/common_voice_17_0",
                         "language": "Italian",
                         "language_code": "it",
                         "max_train_samples": 100,
                         "max_eval_samples": 100
                     },
                     secrets=["HF_TOKEN"],
                     envs=[
                        {"name": "HF_HOME", "value": "shared/data/huggingface"},
                        {"name": "TRANSFORMERS_CACHE", "value":  "shared/data/huggingface"}
                     ],
                     volumes=[{
                        "volume_type": "persistent_volume_claim",
                        "name": "volume-llmpa",
                        "mount_path": "/shared/data",
                        "spec": { "size": "300Gi" }}]
					)

## 2. Fine-tuning

In [None]:
func = project.new_function(
    name="train-whisper", 
    kind="python", 
    python_version="PYTHON3_10", 
    code_src="src/fine_tuning_seq2seq.py",  
    handler="train_and_log_model",
    requirements=["datasets[audio]==3.6.0", "transformers==4.52.0", "torch==2.8.0", "accelerate==1.10.1", "evaluate==0.4.5", "jiwer==4.0.0"]
)

In [None]:
train_run = func.run(action="job",
                     parameters={
                         "model_id": "openai/whisper-small",
                         "model_name": "whisper-ft",
                         "dataset_name": "mozilla-foundation/common_voice_11_0",
                         "language": "Italian",
                         "language_code": "it",
                         "max_train_samples": 100,
                         "max_eval_samples": 100,
                         "eval_steps": 100,
                         "save_steps": 100,
                         "max_steps": 500,
                         "warmup_steps": 50
                     },
                     profile="1xa100",
                     secrets=["HF_TOKEN"],
                     envs=[
                        {"name": "HF_HOME", "value": "shared/data/huggingface"},
                        {"name": "TRANSFORMERS_CACHE", "value":  "shared/data/huggingface"}
                     ],
                     volumes=[{
                        "volume_type": "persistent_volume_claim",
                        "name": "volume-llmpa",
                        "mount_path": "/shared/data",
                        "spec": { "size": "100Gi" }}]
					)

## 3. Convert to FasterWhisper
download model, convert to FasterWhisper and test.

In [None]:
model = project.get_model("whisper-ft")
model.download("./model/whisper-ft", overwrite=True)

In [None]:
%pip install faster-whisper transformers torch==2.8.0
wget -O audio.wav https://github.com/user-attachments/assets/711d1279-6af9-4c6c-a052-e59e7730b757

In [None]:
from ctranslate2.converters import TransformersConverter

tc = TransformersConverter("./model/whisper-ft", copy_files=['tokenizer.json', 'preprocessor_config.json', 'README.md'])
tc.convert('./model/whisper-ft2', quantization="float16")

In [None]:
from faster_whisper import WhisperModel

model = WhisperModel('./model/faster-whisper-ft', device="cpu")


In [None]:
segments, info = model.transcribe("audio.wav", beam_size=5)

print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))

## 4. Log FasterWhisper model and deploy with KubeAI

In [None]:
model = project.log_model("faster-whisper-ft", kind="huggingface", framework="FasterWhisper", source="./model/faster-whisper-ft/")

In [None]:
%pip install faster-whisper torch==2.8.0 transformers

In [None]:
from openai import OpenAI

client = OpenAI(base_url=f"http://kubeai:80/openai/v1", api_key="ignore")
audio_file= open("audio.wav", "rb")

transcription = client.audio.transcriptions.create(
    model=f"model-9b9cc6b85c16488c9d67800e635b7d3d", 
    file=audio_file
)

print(transcription.text)