## Prepare Env

### Import common/shared packages

In [None]:
import os
import shutil
import subprocess
import sys

### Config logging

In [None]:
import logging

from colorama import Fore, Style

class ColorFormatter(logging.Formatter):
  COLORS = {
    logging.DEBUG: Style.DIM + Fore.CYAN,
    logging.INFO: Fore.GREEN,
    logging.WARNING: Fore.YELLOW,
    logging.ERROR: Fore.RED,
    logging.CRITICAL: Style.BRIGHT + Fore.RED,
  }

  RESET = Style.RESET_ALL

  def format(self, record):
    log_color = self.COLORS.get(record.levelno, self.RESET)
    message = super().format(record)
    return f"{log_color}{message}{self.RESET}"

logger = logging.getLogger("Whisper")
handler = logger.root.handlers[0]
handler.setLevel(logging.INFO)
handler.setFormatter(ColorFormatter("%(levelname)s - %(message)s"))

### Locating FFmpeg

In [None]:
ffmpeg_path = shutil.which('ffmpeg')
if ffmpeg_path:
  logger.info(f"Found FFmpeg at: {ffmpeg_path}")
else:
  logger.warning("FFmpeg is not found in the current environment's PATH!")

### Checking Active Conda Environment

In [None]:
if 'CONDA_DEFAULT_ENV' not in os.environ:
  logging.warning('No Conda environment is activated.')
else:
  logger.info(f"Active conda environment: {os.environ['CONDA_DEFAULT_ENV']}")

### Ensure onnxruntime-qnn is installed and onnxruntime is not

**FIXME**: Due to an unknown reason causing a conflict between onnxruntime and onnxruntime-qnn, the QNNExecutionProvider will not be available if onnxruntime is installed. Therefore, we need to ensure that onnxruntime-qnn is installed while onnxruntime is not. Additionally, confirm the availability of the QNNExecutionProvider.

**TODO**: Investigate the root cause of the conflict.

In [None]:
from importlib.metadata import packages_distributions

# Get the list of installed packages
installed_packages = packages_distributions()

# Ensure onnxruntime-qnn is installed and other onnxruntimes are not
if 'onnxruntime' not in installed_packages:
  logger.warning("onnxruntime-qnn is not installed. Installing it now...")
  subprocess.check_call([sys.executable, "-m", "pip", "install", "onnxruntime-qnn==1.20.0"])
else:
  # Check that onnxruntimes other than qnn are not installed
  ort_dist = installed_packages['onnxruntime'][0]
  if ort_dist != 'onnxruntime-qnn':
    logger.info(f"{ort_dist} is installed. Uninstalling it to avoid conflicts...")
    subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", ort_dist])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "onnxruntime-qnn==1.20.0"])
  else:
    logger.info("Found onnxruntime-qnn.")

# Check available providers for QNNExecutionProvider
try:
  from onnxruntime.capi import _pybind_state as C
  available_providers = C.get_available_providers()
  logger.info(f"Available ONNX EPs: {available_providers}")
    
  if 'QNNExecutionProvider' in available_providers:
    logger.info("QNNExecutionProvider is available😊")
  else:
    logger.error("QNNExecutionProvider is not available😱")
except ImportError:
  logger.error("Failed to import onnxruntime.capi. Reinstall onnxruntime-qnn if necessary.")

## Prepare Whisper Model (Whisper-Base-En) for QNPU

### Check QAI Hub config

In [None]:
from qai_hub.client import Client
from pprint import pformat

try:
  logger.info("QAI Hub configuration:")
  logger.info(pformat(Client().config, indent=2))
except Exception:
  logger.error(
    "QAI Hub not found, please run `qai-hub configure --api_token QAI_API_TOKEN`.")

### Method 1: Utilizing Qualcomm AI Hub

In [None]:
from pathlib import Path

cwd = Path(os.path.abspath(''))
encoder_file = cwd / 'build' / 'whisper_base_en' / 'WhisperEncoder.onnx'
decoder_file = cwd / 'build' / 'whisper_base_en' / 'WhisperDecoder.onnx'
model_exists = all(f.exists() for f in [encoder_file, decoder_file])

if model_exists:
  logger.info("Found Whisper-Base-En model😊")
else:
  logger.error("Can not locate Whisper-Base-En model😟")

In [None]:
from qai_hub_models.models.common import TargetRuntime
from qai_hub_models.models.whisper_base_en.export import (
  ALL_COMPONENTS,
  export_model,
)

if not model_exists:
  result = export_model(
    device='Snapdragon X Elite CRD',
    components=ALL_COMPONENTS,
    skip_profiling=True,
    skip_inferencing=True,
    output_dir=str(cwd / 'build' / 'whisper_base_en'),
    target_runtime=TargetRuntime.ONNX,
  )
  print(result)

### [TODO] Method 2: Utilizing ONNX EPContext Generation Script

In [None]:
# TODO

cwd = Path(os.path.abspath(''))
encoder_file = cwd / 'build' / 'whisper_base_en' / 'graph_qst7vs0x_qnn_ctx.onnx'
decoder_file = cwd / 'build' / 'whisper_base_en' / 'graph_sksc3z46_qnn_ctx.onnx'

## Demo

In [None]:
from timeit import default_timer as timer

### AIHub Model

In [None]:
from demo import load_aihub_model, load_demo_audio

# Load whisper model
start_time = timer()
app = load_aihub_model()
logger.info(
  f"Model loading took: {round(timer() - start_time, 2)} seconds")

**Qualcomm Sample Audio**

In [None]:
from demo import load_demo_audio

audio, sample_rate = load_demo_audio()
start_time = timer()
transcription = app.transcribe(audio, sample_rate)

logger.info(
  f"Transcribing took: {round(timer() - start_time, 2)} seconds")
print(f"Audio length: {round(len(audio) / sample_rate, 2)} seconds")
print(f"Word count: {len(transcription.split())}")
print(f"Transcription:\n{transcription}")

**Mono MP3**

In [None]:
from audio2numpy import open_audio

root = Path(os.path.abspath('../../..'))
data_dir = root / 'data'/ 'speech'
audio_file = str(data_dir / 'nce_04_47_mono.mp3')

audio, sample_rate = open_audio(audio_file)

start_time = timer()
transcription = app.transcribe(audio, sample_rate)

logger.info(
  f"Transcribing took: {round(timer() - start_time, 2)} seconds")
print(f"Audio length: {round(len(audio) / sample_rate, 2)} seconds")
print(f"Word count: {len(transcription.split())}")
print(f"Transcription:\n{transcription}")