From b9089c53cb82daaaceb9643840add95411767903 Mon Sep 17 00:00:00 2001
From: Christopher Hertel <mail@christopher-hertel.de>
Date: Thu, 13 Mar 2025 23:26:10 +0100
Subject: [PATCH] feat: add support for whisper on openai

---
 README.md                                 |  3 ++
 examples/audio-transcript-whisper.php     | 22 ++++++++++
 src/Bridge/OpenAI/PlatformFactory.php     |  4 ++
 src/Bridge/OpenAI/Whisper.php             | 31 ++++++++++++++
 src/Bridge/OpenAI/Whisper/File.php        | 18 ++++++++
 src/Bridge/OpenAI/Whisper/ModelClient.php | 52 +++++++++++++++++++++++
 6 files changed, 130 insertions(+)
 create mode 100755 examples/audio-transcript-whisper.php
 create mode 100644 src/Bridge/OpenAI/Whisper.php
 create mode 100644 src/Bridge/OpenAI/Whisper/File.php
 create mode 100644 src/Bridge/OpenAI/Whisper/ModelClient.php

diff --git a/README.md b/README.md
index 52f4d9bd..764e1be2 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,9 @@ $embeddings = new Embeddings();
 * Embeddings Models
   * [OpenAI's Text Embeddings](https://platform.openai.com/docs/guides/embeddings/embedding-models) with [OpenAI](https://platform.openai.com/docs/overview) and [Azure](https://learn.microsoft.com/azure/ai-services/openai/concepts/models) as Platform
   * [Voyage's Embeddings](https://docs.voyageai.com/docs/embeddings) with [Voyage](https://www.voyageai.com/) as Platform
+* Other Models
+  * [OpenAI's Dall·E](https://platform.openai.com/docs/guides/image-generation) with [OpenAI](https://platform.openai.com/docs/overview) as Platform
+  * [OpenAI's Whisper](https://platform.openai.com/docs/guides/speech-to-text) with [OpenAI](https://platform.openai.com/docs/overview) as Platform
 
 See [issue #28](https://github.com/php-llm/llm-chain/issues/28) for planned support of other models and platforms.
 
diff --git a/examples/audio-transcript-whisper.php b/examples/audio-transcript-whisper.php
new file mode 100755
index 00000000..8e1adaa8
--- /dev/null
+++ b/examples/audio-transcript-whisper.php
@@ -0,0 +1,22 @@
+<?php
+
+use PhpLlm\LlmChain\Bridge\OpenAI\PlatformFactory;
+use PhpLlm\LlmChain\Bridge\OpenAI\Whisper;
+use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\File;
+use Symfony\Component\Dotenv\Dotenv;
+
+require_once dirname(__DIR__).'/vendor/autoload.php';
+(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
+
+if (empty($_ENV['OPENAI_API_KEY'])) {
+    echo 'Please set the OPENAI_API_KEY environment variable.'.PHP_EOL;
+    exit(1);
+}
+
+$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
+$model = new Whisper();
+$file = new File(dirname(__DIR__).'/tests/Fixture/audio.mp3');
+
+$response = $platform->request($model, $file);
+
+echo $response->getContent().PHP_EOL;
diff --git a/src/Bridge/OpenAI/PlatformFactory.php b/src/Bridge/OpenAI/PlatformFactory.php
index 410ca73e..7453bf4b 100644
--- a/src/Bridge/OpenAI/PlatformFactory.php
+++ b/src/Bridge/OpenAI/PlatformFactory.php
@@ -9,6 +9,7 @@
 use PhpLlm\LlmChain\Bridge\OpenAI\Embeddings\ResponseConverter as EmbeddingsResponseConverter;
 use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ModelClient as GPTModelClient;
 use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ResponseConverter as GPTResponseConverter;
+use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\ModelClient as WhisperModelClient;
 use PhpLlm\LlmChain\Platform;
 use Symfony\Component\HttpClient\EventSourceHttpClient;
 use Symfony\Contracts\HttpClient\HttpClientInterface;
@@ -23,17 +24,20 @@ public static function create(
         $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient);
 
         $dallEModelClient = new DallEModelClient($httpClient, $apiKey);
+        $whisperModelClient = new WhisperModelClient($httpClient, $apiKey);
 
         return new Platform(
             [
                 new GPTModelClient($httpClient, $apiKey),
                 new EmbeddingsModelClient($httpClient, $apiKey),
                 $dallEModelClient,
+                $whisperModelClient,
             ],
             [
                 new GPTResponseConverter(),
                 new EmbeddingsResponseConverter(),
                 $dallEModelClient,
+                $whisperModelClient,
             ],
         );
     }
diff --git a/src/Bridge/OpenAI/Whisper.php b/src/Bridge/OpenAI/Whisper.php
new file mode 100644
index 00000000..3b2c2f19
--- /dev/null
+++ b/src/Bridge/OpenAI/Whisper.php
@@ -0,0 +1,31 @@
+<?php
+
+declare(strict_types=1);
+
+namespace PhpLlm\LlmChain\Bridge\OpenAI;
+
+use PhpLlm\LlmChain\Model\Model;
+
+final readonly class Whisper implements Model
+{
+    public const WHISPER_1 = 'whisper-1';
+
+    /**
+     * @param array<string, mixed> $options
+     */
+    public function __construct(
+        private string $version = self::WHISPER_1,
+        private array $options = [],
+    ) {
+    }
+
+    public function getVersion(): string
+    {
+        return $this->version;
+    }
+
+    public function getOptions(): array
+    {
+        return $this->options;
+    }
+}
diff --git a/src/Bridge/OpenAI/Whisper/File.php b/src/Bridge/OpenAI/Whisper/File.php
new file mode 100644
index 00000000..25fafdd7
--- /dev/null
+++ b/src/Bridge/OpenAI/Whisper/File.php
@@ -0,0 +1,18 @@
+<?php
+
+declare(strict_types=1);
+
+namespace PhpLlm\LlmChain\Bridge\OpenAI\Whisper;
+
+use PhpLlm\LlmChain\Exception\InvalidArgumentException;
+
+final readonly class File
+{
+    public function __construct(
+        public string $path,
+    ) {
+        if (!is_readable($path) || false === file_get_contents($path)) {
+            throw new InvalidArgumentException(sprintf('The file "%s" does not exist or is not readable.', $path));
+        }
+    }
+}
diff --git a/src/Bridge/OpenAI/Whisper/ModelClient.php b/src/Bridge/OpenAI/Whisper/ModelClient.php
new file mode 100644
index 00000000..304674f8
--- /dev/null
+++ b/src/Bridge/OpenAI/Whisper/ModelClient.php
@@ -0,0 +1,52 @@
+<?php
+
+declare(strict_types=1);
+
+namespace PhpLlm\LlmChain\Bridge\OpenAI\Whisper;
+
+use PhpLlm\LlmChain\Bridge\OpenAI\Whisper;
+use PhpLlm\LlmChain\Model\Model;
+use PhpLlm\LlmChain\Model\Response\ResponseInterface as LlmResponse;
+use PhpLlm\LlmChain\Model\Response\TextResponse;
+use PhpLlm\LlmChain\Platform\ModelClient as PlatformResponseFactory;
+use PhpLlm\LlmChain\Platform\ResponseConverter as PlatformResponseConverter;
+use Symfony\Contracts\HttpClient\HttpClientInterface;
+use Symfony\Contracts\HttpClient\ResponseInterface;
+use Webmozart\Assert\Assert;
+
+final readonly class ModelClient implements PlatformResponseFactory, PlatformResponseConverter
+{
+    public function __construct(
+        private HttpClientInterface $httpClient,
+        #[\SensitiveParameter]
+        private string $apiKey,
+    ) {
+        Assert::stringNotEmpty($apiKey, 'The API key must not be empty.');
+    }
+
+    public function supports(Model $model, object|array|string $input): bool
+    {
+        return $model instanceof Whisper && $input instanceof File;
+    }
+
+    public function request(Model $model, object|array|string $input, array $options = []): ResponseInterface
+    {
+        assert($input instanceof File);
+
+        return $this->httpClient->request('POST', 'https://api.openai.com/v1/audio/transcriptions', [
+            'auth_bearer' => $this->apiKey,
+            'headers' => ['Content-Type' => 'multipart/form-data'],
+            'body' => array_merge($options, $model->getOptions(), [
+                'model' => $model->getVersion(),
+                'file' => fopen($input->path, 'r'),
+            ]),
+        ]);
+    }
+
+    public function convert(ResponseInterface $response, array $options = []): LlmResponse
+    {
+        $data = $response->toArray();
+
+        return new TextResponse($data['text']);
+    }
+}