From e621538c419483406e910d917fe6fe7cf3270806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillermo=20Lengemann=20Garc=C3=A9s?= Date: Wed, 27 Aug 2025 11:32:17 -0500 Subject: [PATCH] [Platform][OpenAI] Add file input normalizer --- examples/openai/pdf-input-binary.php | 34 ++++++++ .../Bridge/OpenAi/Contract/FileNormalizer.php | 49 ++++++++++++ .../Bridge/OpenAi/Contract/OpenAiContract.php | 31 ++++++++ .../src/Bridge/OpenAi/PlatformFactory.php | 4 +- src/platform/src/Message/Content/File.php | 5 ++ .../Contract/DocumentNormalizerTest.php | 77 +++++++++++++++++++ 6 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 examples/openai/pdf-input-binary.php create mode 100644 src/platform/src/Bridge/OpenAi/Contract/FileNormalizer.php create mode 100644 src/platform/src/Bridge/OpenAi/Contract/OpenAiContract.php create mode 100644 src/platform/tests/Bridge/OpenAi/Contract/DocumentNormalizerTest.php diff --git a/examples/openai/pdf-input-binary.php b/examples/openai/pdf-input-binary.php new file mode 100644 index 000000000..1f9cefc5f --- /dev/null +++ b/examples/openai/pdf-input-binary.php @@ -0,0 +1,34 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Platform\Bridge\OpenAi\Gpt; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Platform\Message\Content\File; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); +$model = new Gpt(Gpt::GPT_4O_MINI); + +$agent = new Agent($platform, $model, logger: logger()); +$messages = new MessageBag( + Message::ofUser( + 'What is this document about?', + // Note: You can use either `File::fromFile` or `Document::fromFile` here. + File::fromFile(dirname(__DIR__, 2).'/fixtures/document.pdf'), + ), +); +$result = $agent->call($messages); + +echo $result->getContent().\PHP_EOL; diff --git a/src/platform/src/Bridge/OpenAi/Contract/FileNormalizer.php b/src/platform/src/Bridge/OpenAi/Contract/FileNormalizer.php new file mode 100644 index 000000000..769315fc6 --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/Contract/FileNormalizer.php @@ -0,0 +1,49 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi\Contract; + +use Symfony\AI\Platform\Bridge\OpenAi\Gpt; +use Symfony\AI\Platform\Contract\Normalizer\ModelContractNormalizer; +use Symfony\AI\Platform\Message\Content\File; +use Symfony\AI\Platform\Model; + +/** + * @author Guillermo Lengemann + */ +class FileNormalizer extends ModelContractNormalizer +{ + /** + * @param File $data + * + * @return array{type: 'file', file: array{filename: string, file_data: 'base64'}} + */ + public function normalize(mixed $data, ?string $format = null, array $context = []): array + { + return [ + 'type' => 'file', + 'file' => [ + 'filename' => $data->getFilename(), + 'file_data' => $data->asDataUrl(), + ], + ]; + } + + protected function supportedDataClass(): string + { + return File::class; + } + + protected function supportsModel(Model $model): bool + { + return $model instanceof Gpt; + } +} diff --git a/src/platform/src/Bridge/OpenAi/Contract/OpenAiContract.php b/src/platform/src/Bridge/OpenAi/Contract/OpenAiContract.php new file mode 100644 index 000000000..bc0acae02 --- /dev/null +++ b/src/platform/src/Bridge/OpenAi/Contract/OpenAiContract.php @@ -0,0 +1,31 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\OpenAi\Contract; + +use Symfony\AI\Platform\Bridge\OpenAi\Whisper\AudioNormalizer; +use Symfony\AI\Platform\Contract; +use Symfony\Component\Serializer\Normalizer\NormalizerInterface; + +/** + * @author Guillermo Lengemann + */ +final readonly class OpenAiContract extends Contract +{ + public static function create(NormalizerInterface ...$normalizer): Contract + { + return parent::create( + new AudioNormalizer(), + new FileNormalizer(), + ...$normalizer + ); + } +} diff --git a/src/platform/src/Bridge/OpenAi/PlatformFactory.php b/src/platform/src/Bridge/OpenAi/PlatformFactory.php index c8f97df4d..9d06c5661 100644 --- a/src/platform/src/Bridge/OpenAi/PlatformFactory.php +++ b/src/platform/src/Bridge/OpenAi/PlatformFactory.php @@ -11,7 +11,7 @@ namespace Symfony\AI\Platform\Bridge\OpenAi; -use Symfony\AI\Platform\Bridge\OpenAi\Whisper\AudioNormalizer; +use Symfony\AI\Platform\Bridge\OpenAi\Contract\OpenAiContract; use Symfony\AI\Platform\Bridge\OpenAi\Whisper\ModelClient as WhisperModelClient; use Symfony\AI\Platform\Bridge\OpenAi\Whisper\ResultConverter as WhisperResponseConverter; use Symfony\AI\Platform\Contract; @@ -45,7 +45,7 @@ public static function create( new DallE\ResultConverter(), new WhisperResponseConverter(), ], - $contract ?? Contract::create(new AudioNormalizer()), + $contract ?? OpenAiContract::create(), ); } } diff --git a/src/platform/src/Message/Content/File.php b/src/platform/src/Message/Content/File.php index eecf14d0a..73f0faa0b 100644 --- a/src/platform/src/Message/Content/File.php +++ b/src/platform/src/Message/Content/File.php @@ -89,4 +89,9 @@ public function asResource() return fopen($this->path, 'r'); } + + public function getFilename(): ?string + { + return null === $this->path ? null : basename($this->path); + } } diff --git a/src/platform/tests/Bridge/OpenAi/Contract/DocumentNormalizerTest.php b/src/platform/tests/Bridge/OpenAi/Contract/DocumentNormalizerTest.php new file mode 100644 index 000000000..ae54bf852 --- /dev/null +++ b/src/platform/tests/Bridge/OpenAi/Contract/DocumentNormalizerTest.php @@ -0,0 +1,77 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\OpenAi\Contract; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Medium; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\Gemini\Contract\MessageBagNormalizer; +use Symfony\AI\Platform\Bridge\OpenAi\Contract\FileNormalizer; +use Symfony\AI\Platform\Bridge\OpenAi\Gpt; +use Symfony\AI\Platform\Contract; +use Symfony\AI\Platform\Message\Content\Document; +use Symfony\AI\Platform\Message\Content\File; + +#[Medium] +#[CoversClass(FileNormalizer::class)] +#[CoversClass(MessageBagNormalizer::class)] +final class DocumentNormalizerTest extends TestCase +{ + public function testSupportsNormalization() + { + $normalizer = new FileNormalizer(); + + $this->assertTrue($normalizer->supportsNormalization(new Document('some content', 'application/pdf'), context: [ + Contract::CONTEXT_MODEL => new Gpt(), + ])); + $this->assertTrue($normalizer->supportsNormalization(new File('some content', 'application/pdf'), context: [ + Contract::CONTEXT_MODEL => new Gpt(), + ])); + $this->assertFalse($normalizer->supportsNormalization('not a document')); + } + + public function testGetSupportedTypes() + { + $normalizer = new FileNormalizer(); + + $expected = [ + File::class => true, + ]; + + $this->assertSame($expected, $normalizer->getSupportedTypes(null)); + } + + #[DataProvider('normalizeDataProvider')] + public function testNormalize(File $file, array $expected) + { + $normalizer = new FileNormalizer(); + + $normalized = $normalizer->normalize($file); + + $this->assertEquals($expected, $normalized); + } + + public static function normalizeDataProvider(): iterable + { + yield 'document from file' => [ + File::fromFile(\dirname(__DIR__, 6).'/fixtures/document.pdf'), + [ + 'type' => 'file', + 'file' => [ + 'filename' => 'document.pdf', + 'file_data' => 'data:application/pdf;base64,'.base64_encode(file_get_contents(\dirname(__DIR__, 6).'/fixtures/document.pdf')), + ], + ], + ]; + } +}