diff --git a/README.md b/README.md index dac2d09c..a4a6f313 100644 --- a/README.md +++ b/README.md @@ -391,6 +391,7 @@ $response = $client->audio()->transcribe([ 'model' => 'whisper-1', 'file' => fopen('audio.mp3', 'r'), 'response_format' => 'verbose_json', + 'timestamp_granularities' => ['segment', 'word'] ]); $response->task; // 'transcribe' @@ -412,6 +413,12 @@ foreach ($response->segments as $segment) { $segment->transient; // false } +foreach ($response->words as $word) { + $word->word; // 'Hello' + $word->start; // 0.31 + $word->end; // 0.92 +} + $response->toArray(); // ['task' => 'transcribe', ...] ``` diff --git a/src/Resources/Audio.php b/src/Resources/Audio.php index 6dfef09d..57d535e3 100644 --- a/src/Resources/Audio.php +++ b/src/Resources/Audio.php @@ -56,7 +56,7 @@ public function transcribe(array $parameters): TranscriptionResponse { $payload = Payload::upload('audio/transcriptions', $parameters); - /** @var Response, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, text: string}> $response */ + /** @var Response, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array, text: string}> $response */ $response = $this->transporter->requestObject($payload); return TranscriptionResponse::from($response->data(), $response->meta()); diff --git a/src/Responses/Audio/TranscriptionResponse.php b/src/Responses/Audio/TranscriptionResponse.php index eac0fedd..e1602f1d 100644 --- a/src/Responses/Audio/TranscriptionResponse.php +++ b/src/Responses/Audio/TranscriptionResponse.php @@ -12,12 +12,12 @@ use OpenAI\Testing\Responses\Concerns\Fakeable; /** - * @implements ResponseContract, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, text: string}> + * @implements ResponseContract, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array, text: string}> */ final class TranscriptionResponse implements ResponseContract, ResponseHasMetaInformationContract { /** - * @use ArrayAccessible, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, text: string}> + * @use ArrayAccessible, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array, text: string}> */ use ArrayAccessible; @@ -26,12 +26,14 @@ final class TranscriptionResponse implements ResponseContract, ResponseHasMetaIn /** * @param array $segments + * @param array $words */ private function __construct( public readonly ?string $task, public readonly ?string $language, public readonly ?float $duration, public readonly array $segments, + public readonly array $words, public readonly string $text, private readonly MetaInformation $meta, ) { @@ -40,7 +42,7 @@ private function __construct( /** * Acts as static factory, and returns a new Response instance. * - * @param array{task: ?string, language: ?string, duration: ?float, segments: array, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, text: string}|string $attributes + * @param array{task: ?string, language: ?string, duration: ?float, segments: array, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array, text: string}|string $attributes */ public static function from(array|string $attributes, MetaInformation $meta): self { @@ -52,11 +54,16 @@ public static function from(array|string $attributes, MetaInformation $meta): se $result ), $attributes['segments']) : []; + $words = isset($attributes['words']) ? array_map(fn (array $result): TranscriptionResponseWord => TranscriptionResponseWord::from( + $result + ), $attributes['words']) : []; + return new self( $attributes['task'] ?? null, $attributes['language'] ?? null, $attributes['duration'] ?? null, $segments, + $words, $attributes['text'], $meta, ); @@ -75,6 +82,10 @@ public function toArray(): array static fn (TranscriptionResponseSegment $result): array => $result->toArray(), $this->segments, ), + 'words' => array_map( + static fn (TranscriptionResponseWord $result): array => $result->toArray(), + $this->words, + ), 'text' => $this->text, ]; } diff --git a/src/Responses/Audio/TranscriptionResponseWord.php b/src/Responses/Audio/TranscriptionResponseWord.php new file mode 100644 index 00000000..12f3e3a8 --- /dev/null +++ b/src/Responses/Audio/TranscriptionResponseWord.php @@ -0,0 +1,52 @@ + + */ +final class TranscriptionResponseWord implements ResponseContract +{ + /** + * @use ArrayAccessible + */ + use ArrayAccessible; + + private function __construct( + public readonly string $word, + public readonly float $start, + public readonly float $end, + ) { + } + + /** + * Acts as static factory, and returns a new Response instance. + * + * @param array{word: string, start: float, end: float} $attributes + */ + public static function from(array $attributes): self + { + return new self( + $attributes['word'], + $attributes['start'], + $attributes['end'], + ); + } + + /** + * {@inheritDoc} + */ + public function toArray(): array + { + return [ + 'word' => $this->word, + 'start' => $this->start, + 'end' => $this->end, + ]; + } +} diff --git a/src/ValueObjects/Transporter/Payload.php b/src/ValueObjects/Transporter/Payload.php index 4fcb4c5a..d5aa2110 100644 --- a/src/ValueObjects/Transporter/Payload.php +++ b/src/ValueObjects/Transporter/Payload.php @@ -164,7 +164,7 @@ public function toRequest(BaseUri $baseUri, Headers $headers, QueryParams $query if ($this->contentType === ContentType::MULTIPART) { $streamBuilder = new MultipartStreamBuilder($psr17Factory); - /** @var array $parameters */ + /** @var array> $parameters */ $parameters = $this->parameters; foreach ($parameters as $key => $value) { @@ -172,6 +172,14 @@ public function toRequest(BaseUri $baseUri, Headers $headers, QueryParams $query $value = (string) $value; } + if (is_array($value)) { + foreach ($value as $nestedValue) { + $streamBuilder->addResource($key.'[]', $nestedValue); + } + + continue; + } + $streamBuilder->addResource($key, $value); } diff --git a/tests/Fixtures/Audio.php b/tests/Fixtures/Audio.php index a5ad82e1..96771698 100644 --- a/tests/Fixtures/Audio.php +++ b/tests/Fixtures/Audio.php @@ -33,6 +33,28 @@ function audioTranscriptionVerboseJson(): array 'transient' => false, ], ], + 'words' => [ + [ + 'word' => 'Hello', + 'start' => 0.31999999284744, + 'end' => 0.9200000166893, + ], + [ + 'word' => 'how', + 'start' => 1.0, + 'end' => 1.5599999427795, + ], + [ + 'word' => 'are', + 'start' => 1.5599999427795, + 'end' => 1.8799999952316, + ], + [ + 'word' => 'you', + 'start' => 1.8799999952316, + 'end' => 2.1600000858307, + ], + ], 'text' => 'Hello, how are you?', ]; } diff --git a/tests/Responses/Audio/TranscriptionResponse.php b/tests/Responses/Audio/TranscriptionResponse.php index 3f01da08..be21176f 100644 --- a/tests/Responses/Audio/TranscriptionResponse.php +++ b/tests/Responses/Audio/TranscriptionResponse.php @@ -2,6 +2,7 @@ use OpenAI\Responses\Audio\TranscriptionResponse; use OpenAI\Responses\Audio\TranscriptionResponseSegment; +use OpenAI\Responses\Audio\TranscriptionResponseWord; use OpenAI\Responses\Meta\MetaInformation; test('from json', function () { @@ -13,6 +14,7 @@ ->language->toBeNull() ->duration->toBeNull() ->segments->toBeEmpty() + ->words->toBeEmpty() ->text->toBe('Hello, how are you?') ->meta()->toBeInstanceOf(MetaInformation::class); }); @@ -28,6 +30,9 @@ ->segments->toBeArray() ->segments->toHaveCount(1) ->segments->each->toBeInstanceOf(TranscriptionResponseSegment::class) + ->words->toBeArray() + ->words->toHaveCount(4) + ->words->each->toBeInstanceOf(TranscriptionResponseWord::class) ->text->toBe('Hello, how are you?') ->meta()->toBeInstanceOf(MetaInformation::class); }); @@ -41,6 +46,7 @@ ->language->toBeNull() ->duration->toBeNull() ->segments->toBeEmpty() + ->words->toBeEmpty() ->text->toBe('Hello, how are you?') ->meta()->toBeInstanceOf(MetaInformation::class); }); @@ -54,6 +60,7 @@ ->language->toBeNull() ->duration->toBeNull() ->segments->toBeEmpty() + ->words->toBeEmpty() ->text->toBe(<<<'SRT' 1 00:00:00,000 --> 00:00:04,000 @@ -73,6 +80,7 @@ ->language->toBeNull() ->duration->toBeNull() ->segments->toBeEmpty() + ->words->toBeEmpty() ->text->toBe(<<<'VTT' WEBVTT diff --git a/tests/Responses/Audio/TranscriptionResponseWord.php b/tests/Responses/Audio/TranscriptionResponseWord.php new file mode 100644 index 00000000..10c13ae8 --- /dev/null +++ b/tests/Responses/Audio/TranscriptionResponseWord.php @@ -0,0 +1,20 @@ +toBeInstanceOf(TranscriptionResponseWord::class) + ->word->toBe('Hello') + ->start->toBe(0.31999999284744) + ->end->toBe(0.9200000166893); +}); + +test('to array', function () { + $result = TranscriptionResponseWord::from(audioTranscriptionVerboseJson()['words'][0]); + + expect($result->toArray()) + ->toBe(audioTranscriptionVerboseJson()['words'][0]); +});