From 0c32ab8e1c8e6a8098677a59adbfc1e615875349 Mon Sep 17 00:00:00 2001 From: Sebastian De Deyne Date: Mon, 12 Feb 2018 10:04:33 +0100 Subject: [PATCH] Generator rewrite (#44) * Wip * Wip * Wip * Wip * Wip * Apply fixes from StyleCI * Wip * Remove async * Wip * Wip * Wip * Apply fixes from StyleCI --- LICENSE.md | 2 +- composer.json | 1 + generator/Console/GenerateCommand.php | 19 ++-- generator/Definitions.php | 51 +++++++++ generator/PackageGenerator.php | 10 +- generator/Parser/DefinitionParser.php | 32 ++++++ generator/Parser/RdfaParser.php | 138 ----------------------- generator/Parser/Tasks/ParseProperty.php | 69 ++++++++++++ generator/Parser/Tasks/ParseType.php | 33 ++++++ generator/Parser/Tasks/Task.php | 46 ++++++++ generator/Property.php | 8 ++ generator/TypeCollection.php | 29 +++-- generator/Writer/Filesystem.php | 6 +- generator/temp/.gitignore | 2 + 14 files changed, 278 insertions(+), 168 deletions(-) create mode 100644 generator/Definitions.php create mode 100644 generator/Parser/DefinitionParser.php delete mode 100644 generator/Parser/RdfaParser.php create mode 100644 generator/Parser/Tasks/ParseProperty.php create mode 100644 generator/Parser/Tasks/ParseType.php create mode 100644 generator/Parser/Tasks/Task.php create mode 100644 generator/temp/.gitignore diff --git a/LICENSE.md b/LICENSE.md index 0b32f5f9f..da40f367e 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # The MIT License (MIT) -Copyright (c) Spatie bvba +Copyright (c) 2017 Spatie bvba > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal diff --git a/composer.json b/composer.json index 0d0e75c0c..92357373a 100644 --- a/composer.json +++ b/composer.json @@ -20,6 +20,7 @@ }, "require-dev": { "graham-campbell/analyzer": "^1.1", + "larapack/dd": "^1.1", "league/flysystem": "^1.0", "phpunit/phpunit": "^6.0", "symfony/console": "^3.2", diff --git a/generator/Console/GenerateCommand.php b/generator/Console/GenerateCommand.php index d850a125e..ccbb5389e 100644 --- a/generator/Console/GenerateCommand.php +++ b/generator/Console/GenerateCommand.php @@ -2,22 +2,21 @@ namespace Spatie\SchemaOrg\Generator\Console; +use Spatie\SchemaOrg\Generator\Definitions; use Symfony\Component\Console\Command\Command; use Spatie\SchemaOrg\Generator\PackageGenerator; -use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Output\OutputInterface; class GenerateCommand extends Command { - const SOURCE = 'https://raw.githubusercontent.com/schemaorg/schemaorg/sdo-callisto/data/schema.rdfa'; - protected function configure() { $this ->setName('generate') ->setDescription('Generate the package code from the schema.org docs') - ->addArgument('source', InputArgument::OPTIONAL, 'Path to the RDFa source file', static::SOURCE); + ->addOption('local', 'l', InputOption::VALUE_NONE, 'Use a cached version of the source'); } /** @@ -32,13 +31,15 @@ protected function execute(InputInterface $input, OutputInterface $output) $generator = new PackageGenerator(); - $generator->generate( - file_get_contents($input->getArgument('source')) - ); + $definitions = new Definitions([ + 'core' => 'https://raw.githubusercontent.com/schemaorg/schemaorg/sdo-callisto/data/schema.rdfa', + ]); - $output->writeln('Fresh package generated! Linting...'); + if (! $input->getOption('local')) { + $definitions->preload(); + } - exec('find src -exec php -l {} \; | grep "!(No syntax)"'); + $generator->generate($definitions); $output->writeln('Done!'); diff --git a/generator/Definitions.php b/generator/Definitions.php new file mode 100644 index 000000000..66d8def71 --- /dev/null +++ b/generator/Definitions.php @@ -0,0 +1,51 @@ +sources = $sources; + } + + public function preload() + { + foreach ($this->sources as $sourceId => $sourcePath) { + $this->loadSource($sourceId, false); + } + } + + public function query(string $selector): Crawler + { + return (new Crawler($this->loadSource('core')))->filter($selector); + } + + protected function loadSource(string $sourceId, bool $fromCache = true): string + { + if (! isset($this->sources[$sourceId])) { + throw new RuntimeError("Source `{$sourceId}` doesn't exist"); + } + + $cachePath = $this->tempDir.'/'.$sourceId.'.rdfa'; + + if ($fromCache && file_exists($cachePath)) { + return file_get_contents($cachePath); + } + + $rdfa = file_get_contents($this->sources[$sourceId]); + + file_put_contents($cachePath, $rdfa); + + return $rdfa; + } +} diff --git a/generator/PackageGenerator.php b/generator/PackageGenerator.php index 62b85b4b8..3a2ce70f3 100644 --- a/generator/PackageGenerator.php +++ b/generator/PackageGenerator.php @@ -2,16 +2,14 @@ namespace Spatie\SchemaOrg\Generator; -use Spatie\SchemaOrg\Generator\Parser\RdfaParser; use Spatie\SchemaOrg\Generator\Writer\Filesystem; +use Spatie\SchemaOrg\Generator\Parser\DefinitionParser; class PackageGenerator { - public function generate(string $rdfa) + public function generate(Definitions $definitions) { - $types = (new RdfaParser($rdfa))->parse(); - - $types->sort(); + $types = (new DefinitionParser())->parse($definitions); $filesystem = new Filesystem(__DIR__.'/..'); @@ -23,6 +21,6 @@ public function generate(string $rdfa) $filesystem->createType($type); }); - $filesystem->createFactory($types); + $filesystem->createBuilderClass($types); } } diff --git a/generator/Parser/DefinitionParser.php b/generator/Parser/DefinitionParser.php new file mode 100644 index 000000000..5846626a5 --- /dev/null +++ b/generator/Parser/DefinitionParser.php @@ -0,0 +1,32 @@ +query('[typeof="rdfs:Class"]') + ->each(function (Crawler $crawler) { + return call_user_func(ParseType::fromCrawler($crawler)); + }); + + $properties = $definitions + ->query('[typeof="rdf:Property"]') + ->each(function (Crawler $crawler) { + return call_user_func(ParseProperty::fromCrawler($crawler)); + }); + + return new TypeCollection( + array_filter($types), array_filter($properties) + ); + } +} diff --git a/generator/Parser/RdfaParser.php b/generator/Parser/RdfaParser.php deleted file mode 100644 index 88c7d11b7..000000000 --- a/generator/Parser/RdfaParser.php +++ /dev/null @@ -1,138 +0,0 @@ -crawler = new Crawler($rdfa); - $this->types = new TypeCollection(); - } - - public function parse(): TypeCollection - { - $this->parseTypes(); - $this->parseProperties(); - - return $this->types; - } - - protected function parseTypes() - { - $this->crawler - ->filter('[typeof="rdfs:Class"]') - ->each(function (Crawler $node) { - $type = new Type(); - - $type->name = $this->getText($node, '[property="rdfs:label"]'); - - if (in_array($type->name, ['', 'DataType', 'Float', 'Integer', 'URL'])) { - return; - } - - $type->description = $this->getText($node, '[property="rdfs:comment"]'); - $type->parent = $this->getText($node, '[property="rdfs:subClassOf"]') ?: 'BaseType'; - - if (strpos($type->parent, ':') !== false) { - return; - } - - $type->resource = $this->getAttribute($node, 'resource'); - - $this->types->push($type); - }); - } - - protected function parseProperties() - { - $this->crawler - ->filter('[typeof="rdf:Property"]') - ->each(function (Crawler $node) { - $property = new Property(); - - $property->name = $this->getText($node, '[property="rdfs:label"]'); - - if (empty($property->name)) { - return; - } - - $property->description = $this->getText($node, '[property="rdfs:comment"]'); - $property->resource = $this->getAttribute($node, 'resource'); - - $node - ->filter('[property="http://schema.org/domainIncludes"]') - ->each(function (Crawler $domain) use ($property) { - $this->types->addPropertyToType($property, $this->getText($domain)); - }); - - $node - ->filter('[property="http://schema.org/rangeIncludes"]') - ->each(function (Crawler $range) use ($property) { - $property->addRanges( - $this->castRangesToTypes($this->getText($range)) - ); - }); - }); - } - - protected function getText(Crawler $node, string $selector = null): string - { - if ($selector) { - $node = $node->filter($selector)->first(); - } - - if ($node->count() === 0) { - return ''; - } - - return trim($node->text()); - } - - protected function getAttribute(Crawler $node, string $attribute): string - { - if ($node->count() === 0) { - return ''; - } - - return $node->attr($attribute); - } - - protected function castRangesToTypes(string $range) - { - switch ($range) { - case 'Boolean': - return ['bool']; - case 'False': - return ['false']; - case 'True': - return ['true']; - case 'Date': - case 'Time': - case 'DateTime': - return ['\DateTimeInterface']; - case 'Text': - case 'URL': - return ['string']; - case 'Number': - return ['float', 'int']; - case 'Float': - return ['float']; - case 'Integer': - return ['int']; - default: - return [$range]; - } - } -} diff --git a/generator/Parser/Tasks/ParseProperty.php b/generator/Parser/Tasks/ParseProperty.php new file mode 100644 index 000000000..4b193f7fe --- /dev/null +++ b/generator/Parser/Tasks/ParseProperty.php @@ -0,0 +1,69 @@ +definition); + + $property = new Property(); + + $property->name = $this->getText($node, '[property="rdfs:label"]'); + + if (empty($property->name)) { + return null; + } + + $property->description = $this->getText($node, '[property="rdfs:comment"]'); + + $property->resource = $this->getAttribute($node, 'resource'); + + $node + ->filter('[property="http://schema.org/domainIncludes"]') + ->each(function (Crawler $domain) use ($property) { + $property->addType($this->getText($domain)); + }); + + $node + ->filter('[property="http://schema.org/rangeIncludes"]') + ->each(function (Crawler $range) use ($property) { + $property->addRanges( + $this->castRangesToTypes($this->getText($range)) + ); + }); + + return $property; + } + + private function castRangesToTypes(string $range): array + { + switch ($range) { + case 'Boolean': + return ['bool']; + case 'False': + return ['false']; + case 'True': + return ['true']; + case 'Date': + case 'Time': + case 'DateTime': + return ['\DateTimeInterface']; + case 'Text': + case 'URL': + return ['string']; + case 'Number': + return ['float', 'int']; + case 'Float': + return ['float']; + case 'Integer': + return ['int']; + default: + return [$range]; + } + } +} diff --git a/generator/Parser/Tasks/ParseType.php b/generator/Parser/Tasks/ParseType.php new file mode 100644 index 000000000..3ef517b52 --- /dev/null +++ b/generator/Parser/Tasks/ParseType.php @@ -0,0 +1,33 @@ +definition); + + $type = new Type(); + + $type->name = $this->getText($node, '[property="rdfs:label"]'); + + if (in_array($type->name, ['', 'DataType', 'Float', 'Integer', 'URL'])) { + return null; + } + + $type->description = $this->getText($node, '[property="rdfs:comment"]'); + $type->parent = $this->getText($node, '[property="rdfs:subClassOf"]') ?: 'BaseType'; + + if (strpos($type->parent, ':') !== false) { + return null; + } + + $type->resource = $this->getAttribute($node, 'resource'); + + return $type; + } +} diff --git a/generator/Parser/Tasks/Task.php b/generator/Parser/Tasks/Task.php new file mode 100644 index 000000000..1ecfef968 --- /dev/null +++ b/generator/Parser/Tasks/Task.php @@ -0,0 +1,46 @@ +definition = $definition; + } + + public static function fromCrawler(Crawler $crawler): self + { + $node = $crawler->getNode(0); + $html = $node->ownerDocument->saveHTML($node); + + return new static($html); + } + + protected function getText(Crawler $node, string $selector = null): string + { + if ($selector) { + $node = $node->filter($selector)->first(); + } + + if ($node->count() === 0) { + return ''; + } + + return trim($node->text()); + } + + protected function getAttribute(Crawler $node, string $attribute): string + { + if ($node->count() === 0) { + return ''; + } + + return $node->filter("[{$attribute}]")->attr($attribute) ?? ''; + } +} diff --git a/generator/Property.php b/generator/Property.php index bfe5d5d4a..287e8f2ac 100644 --- a/generator/Property.php +++ b/generator/Property.php @@ -13,9 +13,17 @@ class Property /** @var string */ public $resource; + /** var array */ + public $types = []; + /** @var array */ public $ranges = []; + public function addType(string $type) + { + $this->types[] = $type; + } + public function addRanges(array $ranges) { foreach ($ranges as $range) { diff --git a/generator/TypeCollection.php b/generator/TypeCollection.php index e2f8da5ae..1b9164535 100644 --- a/generator/TypeCollection.php +++ b/generator/TypeCollection.php @@ -5,25 +5,32 @@ class TypeCollection { /** @var array */ - protected $types = []; + private $types = []; - public function push(Type $type) + public function __construct(array $types, array $properties) { - $this->types[$type->name] = $type; - } + $typeNames = array_map(function (Type $type) { + return $type->name; + }, $types); + + $this->types = array_combine($typeNames, $types); - public function sort() - { ksort($this->types); + + foreach ($properties as $property) { + $this->addProperty($property); + } } - public function addPropertyToType(Property $property, string $type) + private function addProperty(Property $property) { - if (! isset($this->types[$type])) { - return; - } + foreach ($property->types as $type) { + if (! isset($this->types[$type])) { + continue; + } - $this->types[$type]->addProperty($property); + $this->types[$type]->addProperty($property); + } } public function each($callable) diff --git a/generator/Writer/Filesystem.php b/generator/Writer/Filesystem.php index a23ba2d6e..09fce05d7 100644 --- a/generator/Writer/Filesystem.php +++ b/generator/Writer/Filesystem.php @@ -21,7 +21,7 @@ public function __construct(string $root) $this->flysystem = new Flysystem($adapter); $this->typeTemplate = new Template('Type.php.twig'); - $this->factoryTemplate = new Template('Schema.php.twig'); + $this->builderClassTemplate = new Template('Schema.php.twig'); } public function clear() @@ -54,11 +54,11 @@ public function createType(Type $type) ); } - public function createFactory(TypeCollection $types) + public function createBuilderClass(TypeCollection $types) { $this->flysystem->put( 'src/Schema.php', - $this->factoryTemplate->render(['types' => $types->toArray()]) + $this->builderClassTemplate->render(['types' => $types->toArray()]) ); } } diff --git a/generator/temp/.gitignore b/generator/temp/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/generator/temp/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore