From b9089c53cb82daaaceb9643840add95411767903 Mon Sep 17 00:00:00 2001 From: Christopher Hertel Date: Thu, 13 Mar 2025 23:26:10 +0100 Subject: [PATCH] feat: add support for whisper on openai --- README.md | 3 ++ examples/audio-transcript-whisper.php | 22 ++++++++++ src/Bridge/OpenAI/PlatformFactory.php | 4 ++ src/Bridge/OpenAI/Whisper.php | 31 ++++++++++++++ src/Bridge/OpenAI/Whisper/File.php | 18 ++++++++ src/Bridge/OpenAI/Whisper/ModelClient.php | 52 +++++++++++++++++++++++ 6 files changed, 130 insertions(+) create mode 100755 examples/audio-transcript-whisper.php create mode 100644 src/Bridge/OpenAI/Whisper.php create mode 100644 src/Bridge/OpenAI/Whisper/File.php create mode 100644 src/Bridge/OpenAI/Whisper/ModelClient.php diff --git a/README.md b/README.md index 52f4d9bd..764e1be2 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,9 @@ $embeddings = new Embeddings(); * Embeddings Models * [OpenAI's Text Embeddings](https://platform.openai.com/docs/guides/embeddings/embedding-models) with [OpenAI](https://platform.openai.com/docs/overview) and [Azure](https://learn.microsoft.com/azure/ai-services/openai/concepts/models) as Platform * [Voyage's Embeddings](https://docs.voyageai.com/docs/embeddings) with [Voyage](https://www.voyageai.com/) as Platform +* Other Models + * [OpenAI's DallĀ·E](https://platform.openai.com/docs/guides/image-generation) with [OpenAI](https://platform.openai.com/docs/overview) as Platform + * [OpenAI's Whisper](https://platform.openai.com/docs/guides/speech-to-text) with [OpenAI](https://platform.openai.com/docs/overview) as Platform See [issue #28](https://github.com/php-llm/llm-chain/issues/28) for planned support of other models and platforms. diff --git a/examples/audio-transcript-whisper.php b/examples/audio-transcript-whisper.php new file mode 100755 index 00000000..8e1adaa8 --- /dev/null +++ b/examples/audio-transcript-whisper.php @@ -0,0 +1,22 @@ +loadEnv(dirname(__DIR__).'/.env'); + +if (empty($_ENV['OPENAI_API_KEY'])) { + echo 'Please set the OPENAI_API_KEY environment variable.'.PHP_EOL; + exit(1); +} + +$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']); +$model = new Whisper(); +$file = new File(dirname(__DIR__).'/tests/Fixture/audio.mp3'); + +$response = $platform->request($model, $file); + +echo $response->getContent().PHP_EOL; diff --git a/src/Bridge/OpenAI/PlatformFactory.php b/src/Bridge/OpenAI/PlatformFactory.php index 410ca73e..7453bf4b 100644 --- a/src/Bridge/OpenAI/PlatformFactory.php +++ b/src/Bridge/OpenAI/PlatformFactory.php @@ -9,6 +9,7 @@ use PhpLlm\LlmChain\Bridge\OpenAI\Embeddings\ResponseConverter as EmbeddingsResponseConverter; use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ModelClient as GPTModelClient; use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ResponseConverter as GPTResponseConverter; +use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\ModelClient as WhisperModelClient; use PhpLlm\LlmChain\Platform; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -23,17 +24,20 @@ public static function create( $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); $dallEModelClient = new DallEModelClient($httpClient, $apiKey); + $whisperModelClient = new WhisperModelClient($httpClient, $apiKey); return new Platform( [ new GPTModelClient($httpClient, $apiKey), new EmbeddingsModelClient($httpClient, $apiKey), $dallEModelClient, + $whisperModelClient, ], [ new GPTResponseConverter(), new EmbeddingsResponseConverter(), $dallEModelClient, + $whisperModelClient, ], ); } diff --git a/src/Bridge/OpenAI/Whisper.php b/src/Bridge/OpenAI/Whisper.php new file mode 100644 index 00000000..3b2c2f19 --- /dev/null +++ b/src/Bridge/OpenAI/Whisper.php @@ -0,0 +1,31 @@ + $options + */ + public function __construct( + private string $version = self::WHISPER_1, + private array $options = [], + ) { + } + + public function getVersion(): string + { + return $this->version; + } + + public function getOptions(): array + { + return $this->options; + } +} diff --git a/src/Bridge/OpenAI/Whisper/File.php b/src/Bridge/OpenAI/Whisper/File.php new file mode 100644 index 00000000..25fafdd7 --- /dev/null +++ b/src/Bridge/OpenAI/Whisper/File.php @@ -0,0 +1,18 @@ +httpClient->request('POST', 'https://api.openai.com/v1/audio/transcriptions', [ + 'auth_bearer' => $this->apiKey, + 'headers' => ['Content-Type' => 'multipart/form-data'], + 'body' => array_merge($options, $model->getOptions(), [ + 'model' => $model->getVersion(), + 'file' => fopen($input->path, 'r'), + ]), + ]); + } + + public function convert(ResponseInterface $response, array $options = []): LlmResponse + { + $data = $response->toArray(); + + return new TextResponse($data['text']); + } +}