diff --git a/python/WhisperSpeechDetection/Dockerfile b/python/WhisperSpeechDetection/Dockerfile
new file mode 100644
index 00000000..a35c8ea5
--- /dev/null
+++ b/python/WhisperSpeechDetection/Dockerfile
@@ -0,0 +1,51 @@
+# syntax=docker/dockerfile:1.2
+
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+ARG BUILD_REGISTRY
+ARG BUILD_TAG=latest
+FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG}
+
+ARG RUN_TESTS=false
+
+RUN pip install --no-cache-dir openai-whisper==20230314 'numpy<1.24,>=1.18'
+
+RUN python -c 'import whisper; whisper.load_model("base")'
+RUN python -c 'import whisper; whisper.load_model("base.en")'
+RUN python -c 'import whisper; whisper.load_model("tiny")'
+RUN python -c 'from tiktoken_ext.openai_public import gpt2; gpt2()'
+
+RUN --mount=target=.,readwrite \
+    install-component.sh; \
+    if [ "${RUN_TESTS,,}" == true ]; then python tests/test_whisper_detection.py; fi
+
+LABEL org.label-schema.license="Apache 2.0" \
+      org.label-schema.name="OpenMPF Whisper Speech Detection" \
+      org.label-schema.schema-version="1.0" \
+      org.label-schema.url="https://openmpf.github.io" \
+      org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \
+      org.label-schema.vendor="MITRE"
diff --git a/python/WhisperSpeechDetection/LICENSE b/python/WhisperSpeechDetection/LICENSE
new file mode 100644
index 00000000..6c2e4117
--- /dev/null
+++ b/python/WhisperSpeechDetection/LICENSE
@@ -0,0 +1,31 @@
+/******************************************************************************
+ * Copyright 2023 The MITRE Corporation                                       *
+ *                                                                            *
+ * Licensed under the Apache License, Version 2.0 (the "License");            *
+ * you may not use this file except in compliance with the License.           *
+ * You may obtain a copy of the License at                                    *
+ *                                                                            *
+ *    http://www.apache.org/licenses/LICENSE-2.0                              *
+ *                                                                            *
+ * Unless required by applicable law or agreed to in writing, software        *
+ * distributed under the License is distributed on an "AS IS" BASIS,          *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
+ * See the License for the specific language governing permissions and        *
+ * limitations under the License.                                             *
+ ******************************************************************************/
+
+ This project contains content developed by The MITRE Corporation. If this code
+ is used in a deployment or embedded within another project, it is requested
+ that you send an email to opensource@mitre.org in order to let us know where
+ this software is being used.
+
+
+
+ This software makes use of source code derived from third party software:
+
+ ------------------------------------------------------------------------------
+
+ Whisper is licensed under MIT License.
+ Copyright (c) 2022 OpenAI
+
+ Project page: https://github.com/openai/whisper
\ No newline at end of file
diff --git a/python/WhisperSpeechDetection/README.md b/python/WhisperSpeechDetection/README.md
new file mode 100644
index 00000000..40fa79e6
--- /dev/null
+++ b/python/WhisperSpeechDetection/README.md
@@ -0,0 +1,109 @@
+# Overview
+
+This repository contains source code and model data for the OpenMPF Whisper Speech Detection component.
+This component uses the OpenAI Whisper model.
+
+# Introduction
+
+This component identifies the language spoken in audio and video clips.
+
+# Input Properties
+- `WHISPER_MODEL_SIZE`: Size of the Whisper model. Whisper has `tiny`, `base`, `small`, `medium`, and `large` models available for multilingual models. English-only models are available in `tiny`, `base`, `small`, and `medium`. 
+- `WHISPER_MODEL_LANG`: Whisper has English-only models and multilingual models. Set to `en` for English-only models and `multi` for multilingual models.
+- `WHISPER_MODE`: Determines whether Whisper will perform language detection, speech-to-text transcription, or speech translation. English-only models can only transcribe English audio. Set to `LANGUAGE_DETECTION` for spoken language detection, `TRANSCRIPTION` for speech-to-text transcription, and `SPEECH_TRANSLATION` for speech translation.
+- `AUDIO_LANGUAGE`: Optional property that indicates the language to use for audio translation or transcription. If left as an empty string, Whisper will automatically detect a single language from the first 30 seconds of audio.
+
+# Output Properties
+- `DETECTED_LANGUAGE`: Language with the highest confidence value.
+- `DETECTED_LANGUAGE_CONFIDENCE`: The confidence value of the language with the highest confidence.
+- `TRANSCRIPT`: Returns transcript of audio for transcription and translation runs.
+- `TRANSLATED_AUDIO`: Returns the translated text for translated audio runs.
+
+# Behavior
+Some quirks in Whisper's behavior when transcribing or translating audio with multiple languages has been observed.
+
+### Transcribe ###
+
+Size  | Provided Language | Result for Spanish Part | Result for English Part
+------|-------------------|-------------------------|-------------------------
+base  | Auto-detected     | Correctly transcribed   | Gibberish
+large | Auto-detected     | Correctly transcribed   | Translated to Spanish
+base  | English           | Translated to English   | Correctly transcribed 
+large | English           | Translated to English   | Correctly transcribed 
+
+
+### Translate ###
+
+Size  | Provided Language | Result for Spanish Part | Result for English Part
+------|-------------------|-------------------------|------------------------
+base  | Auto-detected     | Correctly translated    | Not included in output
+base  | English           | Correctly translated    | Transcribed
+large | Auto-detected     | Correctly translated    | Mostly skipped
+large | English           | Correctly translated    | Mostly skipped
+
+
+See [whisper_behavior_notes.md](whisper_behavior_notes.md) for more details.
+
+# Language Identifiers
+The following are the ISO 639-1 codes, the ISO 639-3 codes, and their corresponding languages which Whisper can translate to English.
+
+All translations are to English.
+
+| ISO-639-1 | ISO-639-3 | Language         |
+| --- |---|------------------|
+| `af` | `afr` | Afrikaans        |
+| `ar` | `ara` | Arabic           |
+| `hy` | `hye` | Armenian         |
+| `az` | `aze` | Azerbaijani      |
+| `be` | `bel` | Belarusian       |
+| `bs` | `bos` | Bosnian          |
+| `bg` | `bul` | Bulgarian        |
+| `ca` | `cat` | Catalan          |
+| `zh` | `zho` | Chinese          |
+| `hr` | `hrv` | Croatian         |
+| `cs` | `ces` | Czech            |
+| `da` | `dan` | Danish           |
+| `nl` | `nld` | Dutch            |
+| `en` | `eng` | English          |
+| `fi` | `fin` | Finnish          |
+| `fr` | `fra` | French           |
+| `gl` | `glg` | Galician         |
+| `de` | `deu` | German           |
+| `el` | `ell` | Greek            |
+| `he` | `heb` | Hebrew           |
+| `hi` | `hin` | Hindi            |
+| `hu` | `hun` | Hungarian        |
+| `is` | `isl` | Icelandic        |
+| `id` | `ind` | Indonesian       |
+| `it` | `ita` | Italian          |
+| `ja` | `jpn` | Japanese         |
+| `kn` | `kan` | Kannada          |
+| `kk` | `kaz` | Kazakh           |
+| `ko` | `kor` | Korean           |
+| `lv` | `lav` | Latvian          |
+| `lt` | `lit` | Lithuanian       |
+| `mk` | `mkd` | Macedonian       |
+| `ms` | `msa` | Malay            |
+| `mi` | `mri` | Maori            |
+| `mr` | `mar` | Marathi          |
+| `ne` | `nep` | Nepali           |
+| `no` | `nor` | Norwegian        |
+| `fa` | `fas` | Persian          |
+| `pl` | `pol` | Polish           |
+| `pt` | `por` | Portuguese       |
+| `ro` | `ron` | Romanian         |
+| `ru` | `rus` | Russian          |
+| `sr` | `srp` | Serbian          |
+| `sk` | `slk` | Slovak           |
+| `sl` | `slv` | Slovenian        |
+| `es` | `spa` | Spanish          |
+| `sw` | `swa` | Swahili          |
+| `sv` | `swe` | Swedish          |
+| `tl` | `tgl` | Tagalog          |
+| `ta` | `tam` | Tamil            |
+| `th` | `tha` | Thai             |
+| `tr` | `tur` | Turkish          |
+| `uk` | `ukr` | Ukrainian        |
+| `ur` | `urd` | Urdu             |
+| `vi` | `vie` | Vietnamese       |
+| `cy` | `cym` | Welsh            |
\ No newline at end of file
diff --git a/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json b/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json
new file mode 100644
index 00000000..1c0cae20
--- /dev/null
+++ b/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json
@@ -0,0 +1,130 @@
+{
+  "componentName": "WhisperSpeechDetection",
+  "componentVersion": "7.2",
+  "middlewareVersion": "7.2",
+  "sourceLanguage": "python",
+  "batchLibrary": "WhisperSpeechDetection",
+  "environmentVariables": [],
+  "algorithm": {
+    "name": "WHISPERSPEECH",
+    "description": "Uses OpenAI's Whisper model to perform language detection in speech.",
+    "actionType": "DETECTION",
+    "requiresCollection": {
+      "states": []
+    },
+    "providesCollection": {
+      "states": [
+        "DETECTION",
+        "DETECTION_SPEECH",
+        "DETECTION_SPEECH_WHISPER"
+      ],
+      "properties": [
+        {
+          "name": "WHISPER_MODEL_LANG",
+          "description": "Whisper has English-only models and multilingual models. Set to 'en' for English-only models and 'multi' for multilingual models.",
+          "type": "STRING",
+          "defaultValue": "multi"
+        },
+        {
+          "name": "WHISPER_MODEL_SIZE",
+          "description": "Whisper models come in multiple sizes; 'tiny', 'base', 'small', 'medium', and 'large'. Multilingual models are available in all 5 sizes. English-only models are not available in 'large' size, but are available in the other four sizes .",
+          "type": "STRING",
+          "defaultValue": "base"
+        },
+        {
+          "name": "WHISPER_MODE",
+          "description": "Set to 'LANGUAGE_DETECTION' for spoken language detection, 'TRANSCRIPTION' for speech-to-text transcription, and 'SPEECH_TRANSLATION' for speech translation.",
+          "type": "STRING",
+          "defaultValue": "LANGUAGE_DETECTION"
+        },
+        {
+          "name": "AUDIO_LANGUAGE",
+          "description": "Optional property that indicates the language to use for audio translation or transcription. If left as an empty string, Whisper will automatically detect a single language from the first 30 seconds of audio.",
+          "type": "STRING",
+          "defaultValue": ""
+        }
+      ]
+    }
+  },
+  "actions": [
+    {
+      "name": "WHISPER SPEECH LANGUAGE DETECTION ACTION",
+      "description": "Uses OpenAI's Whisper model to perform language detection in speech.",
+      "algorithm": "WHISPERSPEECH",
+      "properties": [
+        {
+          "name": "WHISPER_MODE",
+          "value": "LANGUAGE_DETECTION"
+        }
+      ]
+    },
+    {
+      "name": "WHISPER SPEECH DETECTION ACTION",
+      "description": "Uses OpenAI's Whisper model to convert speech to text.",
+      "algorithm": "WHISPERSPEECH",
+      "properties": [
+        {
+          "name": "WHISPER_MODE",
+          "value": "TRANSCRIPTION"
+        }
+      ]
+    },
+    {
+      "name": "WHISPER SPEECH DETECTION WITH TRANSLATION ACTION",
+      "description": "Uses OpenAI's Whisper model to convert speech to text and translate it to English.",
+      "algorithm": "WHISPERSPEECH",
+      "properties": [
+        {
+          "name": "WHISPER_MODE",
+          "value": "SPEECH_TRANSLATION"
+        }
+      ]
+    }
+  ],
+  "tasks": [
+    {
+      "name": "WHISPER SPEECH LANGUAGE DETECTION TASK",
+      "description": "Uses OpenAI's Whisper model to perform language detection in speech.",
+      "actions": [
+        "WHISPER SPEECH LANGUAGE DETECTION ACTION"
+      ]
+    },
+    {
+      "name": "WHISPER SPEECH DETECTION TASK",
+      "description": "Uses OpenAI's Whisper model to convert speech to text.",
+      "actions": [
+        "WHISPER SPEECH DETECTION ACTION"
+      ]
+    },
+    {
+      "name": "WHISPER SPEECH DETECTION WITH TRANSLATION TASK",
+      "description": "Uses OpenAI's Whisper model to convert speech to text and translate it to English.",
+      "actions": [
+        "WHISPER SPEECH DETECTION WITH TRANSLATION ACTION"
+      ]
+    }
+  ],
+  "pipelines": [
+    {
+      "name": "WHISPER SPEECH LANGUAGE DETECTION PIPELINE",
+      "description": "Uses OpenAI's Whisper model to perform language detection in speech.",
+      "tasks": [
+        "WHISPER SPEECH LANGUAGE DETECTION TASK"
+      ]
+    },
+    {
+      "name": "WHISPER SPEECH DETECTION PIPELINE",
+      "description": "Uses OpenAI's Whisper model to convert speech to text.",
+      "tasks": [
+        "WHISPER SPEECH DETECTION TASK"
+      ]
+    },
+    {
+      "name": "WHISPER SPEECH DETECTION WITH TRANSLATION PIPELINE",
+      "description": "Uses OpenAI's Whisper model to convert speech to text and translate it to English.",
+      "tasks": [
+        "WHISPER SPEECH DETECTION WITH TRANSLATION TASK"
+      ]
+    }
+  ]
+}
diff --git a/python/WhisperSpeechDetection/pyproject.toml b/python/WhisperSpeechDetection/pyproject.toml
new file mode 100644
index 00000000..d2aa20ab
--- /dev/null
+++ b/python/WhisperSpeechDetection/pyproject.toml
@@ -0,0 +1,29 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
diff --git a/python/WhisperSpeechDetection/sample_whisper_speech_detector.py b/python/WhisperSpeechDetection/sample_whisper_speech_detector.py
new file mode 100644
index 00000000..62db2cda
--- /dev/null
+++ b/python/WhisperSpeechDetection/sample_whisper_speech_detector.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+import sys
+
+from whisper_speech_detection_component import WhisperSpeechDetectionWrapper
+
+
+def main():
+    if len(sys.argv) != 3:
+        sys.exit(f'Usage {sys.argv[0]} <audio_file> <whisper_mode>')
+
+    _, audio_file, whisper_mode = sys.argv
+
+
+    job_props = {"WHISPER_MODE": whisper_mode}
+
+
+    audio_tracks = WhisperSpeechDetectionWrapper().process_audio(audio_file, 0, 0, job_props)
+    print(audio_tracks)
+    detection_props = audio_tracks[0].detection_properties
+
+    if whisper_mode == 0:
+        print('DETECTED LANGUAGE:', detection_props['DETECTED_LANGUAGE'])
+        print('DETECTED LANGUAGE CONFIDENCE:', audio_tracks[0].confidence)
+    elif whisper_mode == 1:
+        print('DECODED LANGUAGE:', detection_props['DECODED_LANGUAGE'])
+        print('TRANSCRIPT:', detection_props['TRANSCRIPT'])
+    elif whisper_mode == 2:
+        print('DECODED LANGUAGE:', detection_props['DECODED_LANGUAGE'])
+        print('TRANSCRIPT:', detection_props['TRANSCRIPT'])
+        print('TRANSLATED AUDIO', detection_props['TRANSLATED_AUDIO'])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/WhisperSpeechDetection/setup.cfg b/python/WhisperSpeechDetection/setup.cfg
new file mode 100644
index 00000000..e39b3611
--- /dev/null
+++ b/python/WhisperSpeechDetection/setup.cfg
@@ -0,0 +1,41 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+[metadata]
+name = WhisperSpeechDetection
+version = 7.2
+
+[options]
+packages = whisper_speech_detection_component
+install_requires =
+    mpf_component_api>=7.2
+    mpf_component_util>=7.2
+    openai-whisper==20230314
+    numpy<1.24,>=1.18
+
+[options.entry_points]
+mpf.exported_component =
+    component = whisper_speech_detection_component.whisper_speech_detection_component:WhisperSpeechDetectionComponent
diff --git a/python/WhisperSpeechDetection/tests/data/NOTICE b/python/WhisperSpeechDetection/tests/data/NOTICE
new file mode 100644
index 00000000..9ea65fc1
--- /dev/null
+++ b/python/WhisperSpeechDetection/tests/data/NOTICE
@@ -0,0 +1,48 @@
+# bilingual.mp3
+Copyright 2016 Spanish in Texas Project
+Creative Commons Attribution 3.0 Unported License: https://creativecommons.org/licenses/by/3.0
+https://www.youtube.com/watch?v=eBTWD7Zh-AA
+
+# left.wav
+Sphinx-4 Speech Recognition System
+Copyright 1999-2015 Carnegie Mellon University. All Rights Reserved. http://cmusphinx.sourceforge.net/
+Source code: https://github.com/cmusphinx/sphinx4
+
+Copyright 1999-2015 Carnegie Mellon University.
+Portions Copyright 2002-2008 Sun Microsystems, Inc.
+Portions Copyright 2002-2008 Mitsubishi Electric Research Laboratories.
+Portions Copyright 2013-2015 Alpha Cephei, Inc.
+
+All Rights Reserved.  Use is subject to license terms.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Original authors' names are not deleted.
+
+4. The authors' names are not used to endorse or promote products derived from this software without specific prior written permission.
+
+This work was supported in part by funding from the Defense Advanced
+Research Projects Agency and the National Science Foundation of the
+United States of America, the CMU Sphinx Speech Consortium, and
+Sun Microsystems, Inc.
+
+CARNEGIE MELLON UNIVERSITY, SUN MICROSYSTEMS, INC., MITSUBISHI
+ELECTRONIC RESEARCH LABORATORIES AND THE CONTRIBUTORS TO THIS WORK
+DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+CARNEGIE MELLON UNIVERSITY, SUN MICROSYSTEMS, INC., MITSUBISHI
+ELECTRONIC RESEARCH LABORATORIES NOR THE CONTRIBUTORS BE LIABLE FOR
+ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+# left.avi
+Created by combining left.wav with public domain footage.
\ No newline at end of file
diff --git a/python/WhisperSpeechDetection/tests/data/bilingual.mp3 b/python/WhisperSpeechDetection/tests/data/bilingual.mp3
new file mode 100644
index 00000000..e9f099b3
Binary files /dev/null and b/python/WhisperSpeechDetection/tests/data/bilingual.mp3 differ
diff --git a/python/WhisperSpeechDetection/tests/data/left.avi b/python/WhisperSpeechDetection/tests/data/left.avi
new file mode 100644
index 00000000..6d7c1911
Binary files /dev/null and b/python/WhisperSpeechDetection/tests/data/left.avi differ
diff --git a/python/WhisperSpeechDetection/tests/data/left.wav b/python/WhisperSpeechDetection/tests/data/left.wav
new file mode 100644
index 00000000..f91e31f9
Binary files /dev/null and b/python/WhisperSpeechDetection/tests/data/left.wav differ
diff --git a/python/WhisperSpeechDetection/tests/test_whisper_detection.py b/python/WhisperSpeechDetection/tests/test_whisper_detection.py
new file mode 100644
index 00000000..e389a878
--- /dev/null
+++ b/python/WhisperSpeechDetection/tests/test_whisper_detection.py
@@ -0,0 +1,180 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+import unittest
+import os
+
+import mpf_component_api as mpf
+import warnings
+from whisper_speech_detection_component import WhisperSpeechDetectionComponent
+
+
+class TestWhisperSpeechDetection(unittest.TestCase):
+    def setUp(self):
+        warnings.simplefilter('ignore', category=ResourceWarning)
+        warnings.simplefilter('ignore', category=DeprecationWarning)
+
+    @staticmethod
+    def _get_test_file(filename):
+        return os.path.join(
+            os.path.dirname(__file__),
+            'data',
+            filename
+        )
+
+    def test_audio_job(self):
+        job = mpf.AudioJob('test', self._get_test_file('left.wav'), 0, -1, {}, {})
+        comp = WhisperSpeechDetectionComponent()
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('en', result[0].detection_properties['DETECTED_LANGUAGE'])
+        self.assertEqual('eng', result[0].detection_properties['ISO_LANGUAGE'])
+
+    def test_video_job(self):
+        media_properties = dict(FPS='24')
+
+        job = mpf.VideoJob('test', self._get_test_file('left.avi'), 0, -1, {}, media_properties)
+        comp = WhisperSpeechDetectionComponent()
+        result = comp.get_detections_from_video(job)
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('en', result[0].detection_properties['DETECTED_LANGUAGE'])
+        self.assertEqual('eng', result[0].detection_properties['ISO_LANGUAGE'])
+
+    def test_load_different_models_lang_detection(self):
+        job = mpf.AudioJob('test', self._get_test_file('left.wav'), 0, -1, {}, {})
+        comp = WhisperSpeechDetectionComponent()
+
+        self.assertFalse(comp.wrapper.initialized)
+
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('en', result[0].detection_properties['DETECTED_LANGUAGE'])
+        self.assertEqual('eng', result[0].detection_properties['ISO_LANGUAGE'])
+
+        self.assertEqual("multi", comp.wrapper.lang)
+        self.assertEqual("base", comp.wrapper.size)
+        self.assertTrue(comp.wrapper.initialized)
+
+        job_props = dict(WHISPER_MODEL_SIZE='tiny', WHISPER_MODEL_LANG='multi')
+        job = mpf.AudioJob('test', self._get_test_file('left.wav'), 0, -1, job_props, {})
+
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual("multi", comp.wrapper.lang)
+        self.assertEqual("tiny", comp.wrapper.size)
+        self.assertTrue(comp.wrapper.initialized)
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('en', result[0].detection_properties['DETECTED_LANGUAGE'])
+
+    def test_transcribe(self):
+        job_props = dict(WHISPER_MODE="TRANSCRIPTION")
+        job = mpf.AudioJob('test', self._get_test_file('left.wav'), 0, -1, job_props, {})
+
+        comp = WhisperSpeechDetectionComponent()
+        result = comp.get_detections_from_audio(job)
+
+        expected_text = "There's three left on the left side, the one closest to us."
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('en', result[0].detection_properties['DECODED_LANGUAGE'])
+        self.assertEqual('eng', result[0].detection_properties['ISO_LANGUAGE'])
+        self.assertEqual(expected_text, result[0].detection_properties["TRANSCRIPT"])
+
+    def test_transcribe_given_language(self):
+        expected_text = (
+            "Me comunico con diversas personas en inglés y en español o mezclando ambas lenguas. "
+            "Hay tantas razones. Yo lo hago por obviamente solidaridad, porque me reciben en esa comunidad. "
+            "Como crecién el lado mexicano no acostumbraba en ese pasado. "
+            "Luego, al cruzar la frontera metafórica porque no existe derecho, me di cuenta, hablan "
+            "diferente, salpican verdad su conversación con palabras en inglés y porque no. "
+            "Entonces no es fácil hacerlo porque he tratado de hacerlo y lo aprecio y lo entiendo "
+            "mucho más por la experiencia que tenido todos estos. Y lo hago para tratar de pertenecer, "
+            "para no ser diferente que me consideren, como parte de esa comunidad."
+        )
+
+        job_props = dict(WHISPER_MODE="TRANSCRIPTION")
+        job = mpf.AudioJob('test', self._get_test_file('bilingual.mp3'), 0, -1, job_props, {})
+
+        comp = WhisperSpeechDetectionComponent()
+
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('es', result[0].detection_properties['DECODED_LANGUAGE'])
+        self.assertEqual('spa', result[0].detection_properties['ISO_LANGUAGE'])
+
+        # Results for the English portion of the audio are non-deterministic
+        self.assertTrue(expected_text in result[0].detection_properties["TRANSCRIPT"])
+
+        job_props = dict(WHISPER_MODE="TRANSCRIPTION", AUDIO_LANGUAGE='es')
+        job = mpf.AudioJob('test', self._get_test_file('bilingual.mp3'), 0, -1, job_props, {})
+
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual(1, len(result))
+
+        # Results for the English portion of the audio are non-deterministic
+        self.assertTrue(expected_text in result[0].detection_properties["TRANSCRIPT"])
+
+    def test_translation(self):
+        expected_text = (
+            'I communicate with different people in English and Spanish or mixing both languages. '
+            'There are so many reasons. I do it because obviously solidarity, because they receive '
+            'me in that community. As the Mexican people believe, it is not used in that past. Then, '
+            'when crossing the border, metaphorically, because there is no right, I realize, talking '
+            'different, they get out of the truth, their conversation, with words in English. And why '
+            'not? So it is not easy to do it, because I tried to do it. And I appreciate it, and I '
+            'understand it much more, because of the experience I had all these years. And I do it to '
+            'try to be, to not be different than I consider myself to be. As part of that community.'
+        )
+
+        job_props = dict(WHISPER_MODE="SPEECH_TRANSLATION")
+        job = mpf.AudioJob('test', self._get_test_file('bilingual.mp3'), 0, -1, job_props, {})
+
+        comp = WhisperSpeechDetectionComponent()
+
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual(1, len(result))
+        self.assertEqual('es', result[0].detection_properties['DECODED_LANGUAGE'])
+        self.assertEqual('spa', result[0].detection_properties['ISO_LANGUAGE'])
+        self.assertTrue(expected_text in result[0].detection_properties["TRANSLATED_AUDIO"])
+
+        job_props = dict(WHISPER_MODE="SPEECH_TRANSLATION", AUDIO_LANGUAGE='es')
+        job = mpf.AudioJob('test', self._get_test_file('bilingual.mp3'), 0, -1, job_props, {})
+
+        result = comp.get_detections_from_audio(job)
+
+        self.assertEqual(1, len(result))
+        self.assertTrue(expected_text in result[0].detection_properties["TRANSLATED_AUDIO"])
+
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)
diff --git a/python/WhisperSpeechDetection/whisper_behavior_notes.md b/python/WhisperSpeechDetection/whisper_behavior_notes.md
new file mode 100644
index 00000000..3badea5e
--- /dev/null
+++ b/python/WhisperSpeechDetection/whisper_behavior_notes.md
@@ -0,0 +1,194 @@
+# Whisper Multi-Lingual Audio Investigation
+
+When set to transcription mode, sometimes Whisper would also translate
+the audio
+
+Translating when it's supposed to be transcribing seems to happen more
+with larger models and when the audio language is specified, where the
+audio gets translated to the specified language. Spanish+English audio
+where the model is told the language is in Spanish will have the English
+audio translated to Spanish and vice-versa. These translations are very
+similar to the translation output when the model is set to actually
+translate.
+
+Example: First half of audio is in Spanish, **second half in English**
+
+Default settings (base size multilingual model, model auto-detects
+language) output:
+
+- **Bolded text** is English audio, returns different results each
+    run
+
+- Detected language is Spanish
+
+- Me comunico con diversas personas en inglés y en español o
+    mezclando ambas lenguas. Hay tantas razones. Yo lo hago por
+    obviamente solidaridad, porque me reciben en esa comunidad. Como
+    crecién el lado mexicano no acostumbraba en ese pasado. Luego, al
+    cruzar la frontera metafórica porque no existe derecho, me di
+    cuenta, hablan diferente, salpican verdad su conversación con
+    palabras en inglés y porque no. Entonces no es fácil hacerlo porque
+    he tratado de hacerlo y lo aprecio y lo entiendo mucho más por la
+    experiencia que he tenido todos estos años. Y lo hago para tratar de
+    pertenecer, para no ser diferente que me consideren, como parte de
+    esa comunidad. **Gracias por crear este estilo y ver cómo se trabaja
+    Speaking inont不錯. O sea, el flour departador de la creación имеет
+    su**
+
+
+Large multilingual model, auto-detect language output:
+
+-   **Bolded text** is English audio, which has been translated to
+    Spanish. Returns the same/almost the same each run
+
+-   Detected language is Spanish
+
+-   Me comunico con diversas personas en inglés y en español o
+    mezclando ambas lenguas. Hay tantas razones. Yo lo hago por
+    obviamente solidaridad, porque me reciben en esa comunidad. Como
+    crecí en el lado mexicano, no acostumbraba en ese pasado. Luego, al
+    cruzar la frontera metafórica, porque no existe de hecho, me di
+    cuenta que hablan diferente. Salpican su conversación con palabras
+    en inglés. ¿Y por qué no? Entonces, no es fácil hacerlo, porque he
+    tratado de hacerlo. Lo aprecio y lo entiendo mucho más por la
+    experiencia que he tenido todos estos años. Y lo hago para tratar de
+    pertenecer, para no ser diferente, que me consideren como parte de
+    esa comunidad. **Lo hago todo el tiempo. En la escuela, con mis
+    colegas. A veces, cuando estoy enseñando, no mezclo tanto. Trato de
+    hablar más español en mis clases, porque estoy enseñando en español.
+    Si trabajo con gente que son amigas de mí, que hablan español, yo le
+    digo la palabra en la lengua que sea más fácil.**
+
+
+
+Say we wanted to transcribe the English portion of the audio, so we tell
+the model the audio is in English
+
+Base multilingual model, English audio output:
+
+-   **Bolded text** is Spanish audio, and it's been translated to
+    English
+
+-   **I communicate with different people in English and Spanish or in
+    English. There are so many reasons. I do it because I obviously
+    solidarity because I receive in that community. As I grew up in
+    Mexico, I didn\'t use that language. Then, when crossing the border,
+    I was afraid because there was no right. I realized that speaking
+    different people, they really get their conversation with English
+    words. I didn\'t do it because I didn\'t do it. I didn\'t try to do
+    it because I was afraid. I appreciated it and I understand it much
+    more because of the experience I had in all these years. I do it to
+    try to keep it from being different, not to be considered as part of
+    that community.** I do all the time. At school with my colleagues.
+    Usually when I\'m teaching, I don\'t mix as much. I try to speak
+    more Spanish in my Spanish classes because we\'re teaching in
+    Spanish. I will mix if I\'m working with people that are friends of
+    mine, that are bilinguals, say the word in the language that comes
+    easiest.
+
+
+Large multilingual model, English audio output:
+
+-   **Bolded text** is Spanish audio that has been translated to
+    English
+
+-   **I communicate with different people in English and Spanish or
+    mixing both languages. There are so many reasons. I do it for
+    solidarity, because they receive me in that community. As I grew up
+    in the Mexican side, I wasn\'t used to that past. Then, when I
+    crossed the metaphorical border, because it doesn\'t exist, I
+    realized that they speak differently. They speak in English, and why
+    not? So, it\'s not easy to do it, because I\'ve tried to do it. I
+    appreciate it and understand it more because of the experience I\'ve
+    had all these years. I do it to try to belong, not to be different.**
+    I do it all the time, at school with my colleagues. Usually when
+    I\'m teaching, I don\'t mix as much. I try to speak more Spanish in
+    my Spanish classes, because we\'re teaching in Spanish. I will mix
+    if I\'m working with people that are friends of mine, that are
+    bilingual, I\'ll say the word in the language that comes easiest.
+
+
+If we tell the model to translate instead of transcribe, the
+translations are very similar to the ones we've seen above
+
+Base multilingual model, auto-detect language translation results:
+
+-   Detected language is Spanish
+
+-   Only translated Spanish portion of the audio
+
+-   Telling the model the audio is in Spanish returns the same results
+
+-   I communicate with different people in English and Spanish or
+    mixing both languages. There are so many reasons. I do it because
+    obviously solidarity, because they receive me in that community. As
+    the Mexican people believe, it is not used in that past. Then, when
+    crossing the border, metaphorically, because there is no right, I
+    realize, talking different, they get out of the truth, their
+    conversation, with words in English. Why not? It is not easy to do
+    it because I try to do it. I appreciate it and I understand it much
+    more because of the experience I had all these years. I do it to try
+    to keep it from being, not to be different, and I consider it to be
+    a community.
+
+
+Base multilingual model, English audio translation results:
+
+-   **Bolded text** is Spanish audio
+
+-   **I communicate with different people in English and Spanish or in
+    English. There are so many reasons. I do it because I obviously
+    solidarity because I receive in that community. As I grew up in
+    Mexico, I didn\'t use that language. Then, when crossing the border,
+    I was afraid because there was no right. I realized that speaking
+    different people, they really get their conversation with speaking
+    English. I didn\'t do it because I didn\'t want to do it. I tried to
+    do it because I was afraid. I appreciated it and I understand it
+    more because of the experience I had in all these years. I do it to
+    try to keep it from being different, not to be considered as part of
+    that community.** I do all the time. At school with my colleagues.
+    Usually when I\'m teaching, I don\'t mix as much. I try to speak
+    more Spanish in my Spanish classes because we\'re teaching in
+    Spanish. I will mix if I\'m working with people that are friends of
+    mine that are bilingual. Say the word in the language that comes
+    easiest.
+
+
+Large multilingual model, auto-detect language translation results:
+
+-   **Bolded text** is English audio. It's the last sentence in the English
+    audio so the English portion of the audio has been mostly skipped
+    over/ignored
+
+-   I communicate with different people in English and Spanish or
+    mixing both languages. There are so many reasons. I do it for
+    solidarity, because they receive me in that community. As I grew up
+    in the Mexican side, I was not used to that past. Then, when I
+    crossed the metaphorical border, because it does not exist, I
+    realized that they speak differently. They speak in English, and why
+    not? So it is not easy to do it, because I have tried to do it. I
+    appreciate it and understand it much more because of the experience
+    I have had all these years. I do it to try to belong, not to be
+    different, to be considered as part of that community. **I will say
+    the word in the language that comes easiest.**
+
+
+Large multilingual model, English audio translation results:
+
+-   **Bolded text** is Spanish. Again seems to only want to translate
+    Spanish portion of audio but caught the last sentence of the English
+    audio as well
+
+-   **I communicate with different people in English and Spanish or
+    mixing both languages. There are so many reasons. I do it for
+    solidarity, because they receive me in that community. As I grew up
+    in the Mexican side, I was not used to that past. Then, when I
+    crossed the metaphorical border, because it does not exist, I
+    realized that they speak differently. They speak in English, and why
+    not? So it is not easy to do it, because I have tried to do it. I
+    appreciate it and understand it much more because of the experience
+    I have had all these years. I do it to try to belong, not to be
+    different, to be considered as part of that community.** I think it is
+    the language that comes easiest.
+
+
diff --git a/python/WhisperSpeechDetection/whisper_speech_detection_component/__init__.py b/python/WhisperSpeechDetection/whisper_speech_detection_component/__init__.py
new file mode 100644
index 00000000..eee59e99
--- /dev/null
+++ b/python/WhisperSpeechDetection/whisper_speech_detection_component/__init__.py
@@ -0,0 +1,27 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+from .whisper_speech_detection_component import WhisperSpeechDetectionComponent, WhisperSpeechDetectionWrapper
diff --git a/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py b/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py
new file mode 100644
index 00000000..cbd67dbe
--- /dev/null
+++ b/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py
@@ -0,0 +1,321 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2023 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2023 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+import whisper
+
+import logging
+import warnings
+
+import mpf_component_api as mpf
+import mpf_component_util as mpf_util
+from typing import Sequence
+
+logger = logging.getLogger('WhisperDetectionComponent')
+
+warnings.filterwarnings('ignore', 'FP16 is not supported on CPU; using FP32 instead', UserWarning)
+warnings.filterwarnings('ignore', category=ResourceWarning, module='multilingual.tiktoken')
+
+class WhisperSpeechDetectionComponent:
+    detection_type = 'SPEECH'
+
+    def __init__(self):
+        logger.info('Creating instance of WhisperSpeechDetectionComponent')
+        self.wrapper = WhisperSpeechDetectionWrapper()
+        logger.info('WhisperSpeechDetectionComponent created')
+
+    def get_detections_from_video(self, job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]:
+        logger.info('Received video job')
+
+        start_frame = job.start_frame
+        stop_frame = job.stop_frame
+
+        media_properties = job.media_properties
+        media_frame_count = int(media_properties.get('FRAME_COUNT', -1))
+        media_duration = float(media_properties.get('DURATION', -1))
+
+        fpms = float(job.media_properties['FPS']) / 1000.0
+        start_time = int(start_frame / fpms)
+
+        if stop_frame > 0 and stop_frame < media_frame_count - 1:
+            stop_time = int(stop_frame / fpms)
+        elif media_duration > 0:
+            stop_time = int(media_duration)
+        elif media_frame_count > 0:
+            stop_time = int(media_frame_count / fpms)
+        else:
+            stop_time = None
+
+        try:
+            audio_tracks = self.wrapper.process_audio(
+                target_file=job.data_uri,
+                start_time=start_time,
+                stop_time=stop_time,
+                job_properties=job.job_properties
+            )
+        except Exception as e:
+            # Pass the exception up
+            error_str = 'Exception raised while processing audio: ' + str(e)
+            logger.exception(error_str)
+            raise
+
+        try:
+            # Convert audio tracks to video tracks with placeholder frame locs
+            video_tracks = []
+            for track in audio_tracks:
+
+                video_track = mpf.VideoTrack(
+                    start_frame=0,
+                    stop_frame=-1,
+                    confidence=track.confidence,
+                    detection_properties=track.detection_properties
+                )
+
+                loc = mpf.ImageLocation(
+                    x_left_upper=0,
+                    y_left_upper=0,
+                    width=0,
+                    height=0,
+                    confidence=track.confidence,
+                    detection_properties=track.detection_properties
+                )
+
+                video_track.frame_locations[0] = loc
+                video_tracks.append(video_track)
+
+        except Exception as e:
+            # Pass the exception up
+            logger.exception(
+                'Exception raised while converting to video track: ' + str(e)
+            )
+            raise
+
+        logger.info('Processing complete. Found %d tracks.' % len(video_tracks))
+        return video_tracks
+
+    def get_detections_from_audio(self, job: mpf.AudioJob) -> Sequence[mpf.AudioTrack]:
+        logger.info("Received audio job")
+        start_time = job.start_time
+        stop_time = job.stop_time
+
+        try:
+            audio_tracks = self.wrapper.process_audio(
+                target_file=job.data_uri,
+                start_time=start_time,
+                stop_time=stop_time,
+                job_properties=job.job_properties
+            )
+        except Exception as e:
+            # Pass the exception up
+            logger.exception(
+                "Exception raised while processing audio: " + str(e)
+            )
+            raise
+
+        logger.info('Processing complete. Found %d tracks.' % len(audio_tracks))
+        return audio_tracks
+
+
+class WhisperSpeechDetectionWrapper:
+    def __init__(self):
+        self.model = None
+        self.initialized = False
+        self.size = None
+        self.lang = None
+
+        self.iso_map = {
+            'af': 'afr',
+            'ar': 'ara',
+            'hy': 'hye',
+            'az': 'aze',
+            'be': 'bel',
+            'bs': 'bos',
+            'bg': 'bul',
+            'ca': 'cat',
+            'zh': 'zho',
+            'cs': 'ces',
+            'da': 'dan',
+            'nl': 'nld',
+            'en': 'eng',
+            'et': 'est',
+            'fi': 'fin',
+            'fr': 'fra',
+            'de': 'deu',
+            'el': 'ell',
+            'he': 'heb',
+            'hi': 'hin',
+            'hr': 'hrv',
+            'hu': 'hun',
+            'id': 'ind',
+            'gl': 'glg',
+            'it': 'ita',
+            'is': 'isl',
+            'ja': 'jpn',
+            'kn': 'kan',
+            'kk': 'kaz',
+            'ko': 'kor',
+            'lv': 'lav',
+            'lt': 'lit',
+            'mr': 'mar',
+            'mi': 'mri',
+            'mk': 'mkd',
+            'ms': 'msa',
+            'ne': 'nep',
+            'no': 'nor',
+            'fa': 'fas',
+            'pl': 'pol',
+            'pt': 'por',
+            'ro': 'ron',
+            'ru': 'rus',
+            'sr': 'srp',
+            'sk': 'slk',
+            'sl': 'slv',
+            'es': 'spa',
+            'sw': 'swa',
+            'sv': 'swe',
+            'ta': 'tam',
+            'tl': 'tgl',
+            'th': 'tha',
+            'tr': 'tur',
+            'uk': 'ukr',
+            'ur': 'urd',
+            'vi': 'vie',
+            'cy': 'cym'
+        }
+
+    def process_audio(self, target_file, start_time, stop_time, job_properties):
+        model_size = mpf_util.get_property(job_properties, 'WHISPER_MODEL_SIZE', "base")
+        model_lang = mpf_util.get_property(job_properties, 'WHISPER_MODEL_LANG', "multi")
+        mode = mpf_util.get_property(job_properties, "WHISPER_MODE", "LANGUAGE_DETECTION")
+
+        if model_lang == "en" and model_size == "large":
+            raise mpf.DetectionError.INVALID_PROPERTY.exception("Whisper does not have a large English model available.")
+
+        if not self.initialized:
+            self._load_model(model_size, model_lang)
+            self.initialized = True
+
+        elif self.size != model_size or self.lang != model_lang:
+            self._load_model(model_size, model_lang)
+
+        audio_tracks = []
+
+        if mode == "LANGUAGE_DETECTION":
+            if model_lang != "multi":
+                raise mpf.DetectionError.INVALID_PROPERTY.exception("Whisper does not support language detection "
+                                                                    "using English models. Please use the multilingual "
+                                                                    "models.")
+
+            audio = whisper.load_audio(target_file)
+            audio = whisper.pad_or_trim(audio)
+
+            mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
+            _, probs = self.model.detect_language(mel)
+            logger.info(f"Detected language: {max(probs, key=probs.get)}")
+
+            detected_language = max(probs, key=probs.get)
+            detected_lang_conf = probs[detected_language]
+
+            iso_639_3 = self.iso_map.get(detected_language, 'UNKNOWN')
+
+            properties = dict(
+                DETECTED_LANGUAGE=detected_language,
+                ISO_LANGUAGE=iso_639_3
+            )
+
+            track = mpf.AudioTrack(
+                start_time=start_time,
+                stop_time=stop_time,
+                confidence=detected_lang_conf,
+                detection_properties=properties
+            )
+            audio_tracks.append(track)
+
+            logger.debug('Completed process audio')
+
+        elif mode == "TRANSCRIPTION":
+            properties = self._transcribe_text(target_file, job_properties)
+
+            track = mpf.AudioTrack(
+                start_time=start_time,
+                stop_time=stop_time,
+                detection_properties=properties
+            )
+
+            audio_tracks.append(track)
+        elif mode == "SPEECH_TRANSLATION":
+            properties = self._transcribe_text(target_file, job_properties)
+            result = self.model.transcribe(target_file, task="translate")
+
+            properties["TRANSLATED_AUDIO"] = result['text'].strip()
+
+            track = mpf.AudioTrack(
+                start_time=start_time,
+                stop_time=stop_time,
+                detection_properties=properties
+            )
+
+            audio_tracks.append(track)
+
+        return audio_tracks
+
+    def _load_model(self, model_size, model_lang):
+        self.size = model_size
+        self.lang = model_lang
+
+        if self.lang == "en":
+            model_string = self.size + "." + "en"
+        else:
+            model_string = self.size
+
+        logger.info(f'Loading the "{model_string}" model...')
+        self.model = whisper.load_model(model_string)
+
+    def _transcribe_text(self, target_file, job_properties):
+        language = mpf_util.get_property(job_properties, 'AUDIO_LANGUAGE', "")
+
+        if language == "":
+            result = self.model.transcribe(target_file)
+
+            iso_639_3 = self.iso_map.get(result['language'], 'UNKNOWN')
+
+            properties = dict(
+                DECODED_LANGUAGE=result['language'],
+                ISO_LANGUAGE=iso_639_3,
+                TRANSCRIPT=result['text'].strip()
+            )
+
+        else:
+
+            iso_639_3 = self.iso_map.get(language, 'UNKNOWN')
+
+            result = self.model.transcribe(target_file, language=language)
+            properties = dict(
+                DECODED_LANGUAGE=language,
+                ISO_LANGUAGE=iso_639_3,
+                TRANSCRIPT=result['text'].strip()
+            )
+
+        return properties