diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 69ade427..9e6af5d3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,3 +42,29 @@ jobs: - name: Tests run: composer test + + phpstan: + name: PHPStan Static Analysis + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install PHP + uses: shivammathur/setup-php@v2 + with: + php-version: 8.4 + + - name: Cache PHP dependencies + uses: actions/cache@v4 + with: + path: vendor + key: ${{ runner.os }}-php-8.4-composer-${{ hashFiles('**/composer.json') }} + restore-keys: ${{ runner.os }}-php-8.4-composer- + + - name: Install dependencies + run: composer install + + - name: Run PHPStan + run: composer phpstan diff --git a/composer.json b/composer.json index be3fdf91..993f65e5 100644 --- a/composer.json +++ b/composer.json @@ -42,7 +42,9 @@ "nyholm/psr7": "^1.2", "oscarotero/php-cs-fixer-config": "^1.0", "brick/varexporter": "^0.3.1", - "symfony/css-selector": "^5.0" + "symfony/css-selector": "^5.0", + "phpstan/phpstan": "^2.1", + "phpstan/phpstan-strict-rules": "^2.0" }, "suggest": { "symfony/css-selector": "If you want to get elements using css selectors" @@ -64,6 +66,7 @@ "demo": "php -S localhost:8888 demo/index.php", "test": "phpunit", "cs-fix": "php-cs-fixer fix", + "phpstan": "phpstan --memory-limit=-1", "update-resources": [ "php scripts/update-oembed.php", "php scripts/update-suffix.php" diff --git a/phpstan.dist.neon b/phpstan.dist.neon new file mode 100644 index 00000000..e989bd1a --- /dev/null +++ b/phpstan.dist.neon @@ -0,0 +1,15 @@ +includes: + - vendor/phpstan/phpstan-strict-rules/rules.neon + +parameters: + level: max + paths: + - src +# - tests + excludePaths: + - tests/cache + - tests/fixtures + checkMissingCallableSignature: true + checkUninitializedProperties: true + checkTooWideReturnTypesInProtectedAndPublicMethods: true + checkImplicitMixed: true diff --git a/src/Adapters/Archive/Api.php b/src/Adapters/Archive/Api.php index aa105148..b4820eaa 100644 --- a/src/Adapters/Archive/Api.php +++ b/src/Adapters/Archive/Api.php @@ -9,6 +9,9 @@ class Api { use HttpApiTrait; + /** + * @return array + */ protected function fetchData(): array { $this->endpoint = $this->extractor->getUri()->withQuery('output=json'); diff --git a/src/Adapters/Archive/Detectors/AuthorName.php b/src/Adapters/Archive/Detectors/AuthorName.php index ea467255..3aef8b7b 100644 --- a/src/Adapters/Archive/Detectors/AuthorName.php +++ b/src/Adapters/Archive/Detectors/AuthorName.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Archive\Detectors; +use Embed\Adapters\Archive\Extractor; use Embed\Detectors\AuthorName as Detector; class AuthorName extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('metadata', 'creator') - ?: parent::detect(); + $result = $api->str('metadata', 'creator'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Archive/Detectors/Description.php b/src/Adapters/Archive/Detectors/Description.php index d3c3af1a..baffbba4 100644 --- a/src/Adapters/Archive/Detectors/Description.php +++ b/src/Adapters/Archive/Detectors/Description.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Archive\Detectors; +use Embed\Adapters\Archive\Extractor; use Embed\Detectors\Description as Detector; class Description extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('metadata', 'extract') - ?: parent::detect(); + $result = $api->str('metadata', 'extract'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Archive/Detectors/PublishedTime.php b/src/Adapters/Archive/Detectors/PublishedTime.php index 47e36d2d..0d298d94 100644 --- a/src/Adapters/Archive/Detectors/PublishedTime.php +++ b/src/Adapters/Archive/Detectors/PublishedTime.php @@ -4,17 +4,25 @@ namespace Embed\Adapters\Archive\Detectors; use DateTime; +use Embed\Adapters\Archive\Extractor; use Embed\Detectors\PublishedTime as Detector; class PublishedTime extends Detector { public function detect(): ?DateTime { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->time('metadata', 'publicdate') - ?: $api->time('metadata', 'addeddate') - ?: $api->time('metadata', 'date') - ?: parent::detect(); + $fields = ['publicdate', 'addeddate', 'date']; + foreach ($fields as $field) { + $result = $api->time('metadata', $field); + if ($result !== null) { + return $result; + } + } + + return parent::detect(); } } diff --git a/src/Adapters/Archive/Detectors/Title.php b/src/Adapters/Archive/Detectors/Title.php index 4ba1dca9..328ed05a 100644 --- a/src/Adapters/Archive/Detectors/Title.php +++ b/src/Adapters/Archive/Detectors/Title.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Archive\Detectors; +use Embed\Adapters\Archive\Extractor; use Embed\Detectors\Title as Detector; class Title extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('metadata', 'title') - ?: parent::detect(); + $result = $api->str('metadata', 'title'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Archive/Extractor.php b/src/Adapters/Archive/Extractor.php index ab941e0f..ae99596f 100644 --- a/src/Adapters/Archive/Extractor.php +++ b/src/Adapters/Archive/Extractor.php @@ -4,20 +4,35 @@ namespace Embed\Adapters\Archive; use Embed\Extractor as Base; +use Embed\Http\Crawler; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\UriInterface; class Extractor extends Base { private Api $api; + public function __construct( + UriInterface $uri, + RequestInterface $request, + ResponseInterface $response, + Crawler $crawler + ) { + parent::__construct($uri, $request, $response, $crawler); + $this->api = new Api($this); + } + public function getApi(): Api { return $this->api; } + /** + * @return array + */ public function createCustomDetectors(): array { - $this->api = new Api($this); - return [ 'title' => new Detectors\Title($this), 'description' => new Detectors\Description($this), diff --git a/src/Adapters/Bandcamp/Extractor.php b/src/Adapters/Bandcamp/Extractor.php index f4a97417..4375d9c4 100644 --- a/src/Adapters/Bandcamp/Extractor.php +++ b/src/Adapters/Bandcamp/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/CadenaSer/Detectors/Code.php b/src/Adapters/CadenaSer/Detectors/Code.php index d279e7b2..0715ef2a 100644 --- a/src/Adapters/CadenaSer/Detectors/Code.php +++ b/src/Adapters/CadenaSer/Detectors/Code.php @@ -13,8 +13,8 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): ?EmbedCode diff --git a/src/Adapters/CadenaSer/Extractor.php b/src/Adapters/CadenaSer/Extractor.php index aa237776..63d1b635 100644 --- a/src/Adapters/CadenaSer/Extractor.php +++ b/src/Adapters/CadenaSer/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Facebook/Detectors/Title.php b/src/Adapters/Facebook/Detectors/Title.php index b73a53ff..8d051131 100644 --- a/src/Adapters/Facebook/Detectors/Title.php +++ b/src/Adapters/Facebook/Detectors/Title.php @@ -15,7 +15,7 @@ public function detect(): ?string $document = $this->extractor->getDocument(); $oembed = $this->extractor->getOEmbed(); - return $oembed->str('title') - ?: $document->select('.//head/title')->str(); + $result = $oembed->str('title'); + return $result !== null ? $result : $document->select('.//head/title')->str(); } } diff --git a/src/Adapters/Facebook/Extractor.php b/src/Adapters/Facebook/Extractor.php index 5b4cb701..9d24eeb1 100644 --- a/src/Adapters/Facebook/Extractor.php +++ b/src/Adapters/Facebook/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { $this->oembed = new OEmbed($this); diff --git a/src/Adapters/Facebook/OEmbed.php b/src/Adapters/Facebook/OEmbed.php index 84d57726..075b0bdf 100644 --- a/src/Adapters/Facebook/OEmbed.php +++ b/src/Adapters/Facebook/OEmbed.php @@ -16,14 +16,14 @@ protected function detectEndpoint(): ?UriInterface { $token = $this->extractor->getSetting('facebook:token'); - if (!$token) { + if (!is_string($token) || $token === '') { return null; } $uri = $this->extractor->getUri(); if (strpos($uri->getPath(), 'login') !== false) { parse_str($uri->getQuery(), $params); - if (!empty($params['next'])) { + if (isset($params['next']) && is_string($params['next']) && $params['next'] !== '' && $params['next'] !== '0') { $uri = $this->extractor->getCrawler()->createUri($params['next']); } } diff --git a/src/Adapters/Flickr/Detectors/Code.php b/src/Adapters/Flickr/Detectors/Code.php index 1dfe50ae..273a0d61 100644 --- a/src/Adapters/Flickr/Detectors/Code.php +++ b/src/Adapters/Flickr/Detectors/Code.php @@ -13,8 +13,12 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + if ($result !== null) { + return $result; + } + + return $this->fallback(); } private function fallback(): ?EmbedCode diff --git a/src/Adapters/Flickr/Extractor.php b/src/Adapters/Flickr/Extractor.php index fe18c9d8..263b8733 100644 --- a/src/Adapters/Flickr/Extractor.php +++ b/src/Adapters/Flickr/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Gist/Api.php b/src/Adapters/Gist/Api.php index a5f10044..b8262d24 100644 --- a/src/Adapters/Gist/Api.php +++ b/src/Adapters/Gist/Api.php @@ -9,6 +9,9 @@ class Api { use HttpApiTrait; + /** + * @return array + */ protected function fetchData(): array { $uri = $this->extractor->getUri(); diff --git a/src/Adapters/Gist/Detectors/AuthorName.php b/src/Adapters/Gist/Detectors/AuthorName.php index b31aea6a..214910f3 100644 --- a/src/Adapters/Gist/Detectors/AuthorName.php +++ b/src/Adapters/Gist/Detectors/AuthorName.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Gist\Detectors; +use Embed\Adapters\Gist\Extractor; use Embed\Detectors\AuthorName as Detector; class AuthorName extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('owner') - ?: parent::detect(); + $result = $api->str('owner'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Gist/Detectors/AuthorUrl.php b/src/Adapters/Gist/Detectors/AuthorUrl.php index 1241429e..0060136a 100644 --- a/src/Adapters/Gist/Detectors/AuthorUrl.php +++ b/src/Adapters/Gist/Detectors/AuthorUrl.php @@ -3,6 +3,7 @@ namespace Embed\Adapters\Gist\Detectors; +use Embed\Adapters\Gist\Extractor; use Embed\Detectors\AuthorUrl as Detector; use Psr\Http\Message\UriInterface; @@ -10,11 +11,15 @@ class AuthorUrl extends Detector { public function detect(): ?UriInterface { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); $owner = $api->str('owner'); - if ($owner) { - return $this->extractor->getCrawler()->createUri("https://github.com/{$owner}"); + // Exclude empty string and '0' to maintain original truthy check behavior + // The string '0' is not a valid GitHub username and should not generate a URL + if (is_string($owner) && $owner !== '' && $owner !== '0') { + return $extractor->getCrawler()->createUri("https://github.com/{$owner}"); } return parent::detect(); diff --git a/src/Adapters/Gist/Detectors/Code.php b/src/Adapters/Gist/Detectors/Code.php index 23960ee1..b7df106d 100644 --- a/src/Adapters/Gist/Detectors/Code.php +++ b/src/Adapters/Gist/Detectors/Code.php @@ -3,6 +3,7 @@ namespace Embed\Adapters\Gist\Detectors; +use Embed\Adapters\Gist\Extractor; use Embed\Detectors\Code as Detector; use Embed\EmbedCode; use function Embed\html; @@ -11,21 +12,25 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $parentResult = parent::detect(); + return $parentResult !== null ? $parentResult : $this->fallback(); } private function fallback(): ?EmbedCode { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); $code = $api->html('div'); $stylesheet = $api->str('stylesheet'); - if ($code && $stylesheet) { + if ($code !== null && $stylesheet !== null) { return new EmbedCode( html('link', ['rel' => 'stylesheet', 'href' => $stylesheet]).$code ); } + + return null; } } diff --git a/src/Adapters/Gist/Detectors/PublishedTime.php b/src/Adapters/Gist/Detectors/PublishedTime.php index 1487524b..d49c5da2 100644 --- a/src/Adapters/Gist/Detectors/PublishedTime.php +++ b/src/Adapters/Gist/Detectors/PublishedTime.php @@ -4,15 +4,18 @@ namespace Embed\Adapters\Gist\Detectors; use DateTime; +use Embed\Adapters\Gist\Extractor; use Embed\Detectors\PublishedTime as Detector; class PublishedTime extends Detector { public function detect(): ?DateTime { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->time('created_at') - ?: parent::detect(); + $result = $api->time('created_at'); + return $result !== null ? $result : parent::detect(); } } diff --git a/src/Adapters/Gist/Extractor.php b/src/Adapters/Gist/Extractor.php index f9ac088c..369ad17c 100644 --- a/src/Adapters/Gist/Extractor.php +++ b/src/Adapters/Gist/Extractor.php @@ -4,20 +4,35 @@ namespace Embed\Adapters\Gist; use Embed\Extractor as Base; +use Embed\Http\Crawler; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\UriInterface; class Extractor extends Base { private Api $api; + public function __construct( + UriInterface $uri, + RequestInterface $request, + ResponseInterface $response, + Crawler $crawler + ) { + parent::__construct($uri, $request, $response, $crawler); + $this->api = new Api($this); + } + public function getApi(): Api { return $this->api; } + /** + * @return array + */ public function createCustomDetectors(): array { - $this->api = new Api($this); - return [ 'authorName' => new Detectors\AuthorName($this), 'authorUrl' => new Detectors\AuthorUrl($this), diff --git a/src/Adapters/Github/Detectors/Code.php b/src/Adapters/Github/Detectors/Code.php index 350e15c4..e3adc4d7 100644 --- a/src/Adapters/Github/Detectors/Code.php +++ b/src/Adapters/Github/Detectors/Code.php @@ -12,8 +12,8 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): ?EmbedCode diff --git a/src/Adapters/Github/Extractor.php b/src/Adapters/Github/Extractor.php index 0be93580..19763888 100644 --- a/src/Adapters/Github/Extractor.php +++ b/src/Adapters/Github/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Ideone/Detectors/Code.php b/src/Adapters/Ideone/Detectors/Code.php index 0238981a..e14c0273 100644 --- a/src/Adapters/Ideone/Detectors/Code.php +++ b/src/Adapters/Ideone/Detectors/Code.php @@ -11,16 +11,16 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): ?EmbedCode { $uri = $this->extractor->getUri(); - $id = explode('/', $uri->getPath())[1]; + $id = explode('/', $uri->getPath())[1] ?? ''; - if (empty($id)) { + if ($id === '' || $id === '0') { return null; } diff --git a/src/Adapters/Ideone/Extractor.php b/src/Adapters/Ideone/Extractor.php index aa7132fc..1581c0a3 100644 --- a/src/Adapters/Ideone/Extractor.php +++ b/src/Adapters/Ideone/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/ImageShack/Api.php b/src/Adapters/ImageShack/Api.php index a5bc3ec0..8046a42c 100644 --- a/src/Adapters/ImageShack/Api.php +++ b/src/Adapters/ImageShack/Api.php @@ -11,6 +11,9 @@ class Api { use HttpApiTrait; + /** + * @return array + */ protected function fetchData(): array { $uri = $this->extractor->getUri(); @@ -25,12 +28,19 @@ protected function fetchData(): array $id = getDirectory($uri->getPath(), 1); - if (empty($id)) { + if ($id === null || $id === '' || $id === '0') { return []; } $this->endpoint = $this->extractor->getCrawler()->createUri("https://api.imageshack.com/v2/images/{$id}"); $data = $this->fetchJSON($this->endpoint); - return $data['result'] ?? []; + + if (isset($data['result']) && is_array($data['result'])) { + /** @var array */ + $result = $data['result']; + return $result; + } + + return []; } } diff --git a/src/Adapters/ImageShack/Detectors/AuthorName.php b/src/Adapters/ImageShack/Detectors/AuthorName.php index 52c4ff5f..f77bbbc8 100644 --- a/src/Adapters/ImageShack/Detectors/AuthorName.php +++ b/src/Adapters/ImageShack/Detectors/AuthorName.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\ImageShack\Detectors; +use Embed\Adapters\ImageShack\Extractor; use Embed\Detectors\AuthorName as Detector; class AuthorName extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('owner', 'username') - ?: parent::detect(); + $result = $api->str('owner', 'username'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/ImageShack/Detectors/AuthorUrl.php b/src/Adapters/ImageShack/Detectors/AuthorUrl.php index 1578da5d..598e319a 100644 --- a/src/Adapters/ImageShack/Detectors/AuthorUrl.php +++ b/src/Adapters/ImageShack/Detectors/AuthorUrl.php @@ -3,6 +3,7 @@ namespace Embed\Adapters\ImageShack\Detectors; +use Embed\Adapters\ImageShack\Extractor; use Embed\Detectors\AuthorUrl as Detector; use Psr\Http\Message\UriInterface; @@ -10,11 +11,15 @@ class AuthorUrl extends Detector { public function detect(): ?UriInterface { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); $owner = $api->str('owner', 'username'); - if ($owner) { - return $this->extractor->getCrawler()->createUri("https://imageshack.com/{$owner}"); + // Exclude empty string and '0' to maintain original truthy check behavior + // The string '0' is not a valid username and should not generate a URL + if (is_string($owner) && $owner !== '' && $owner !== '0') { + return $extractor->getCrawler()->createUri("https://imageshack.com/{$owner}"); } return parent::detect(); diff --git a/src/Adapters/ImageShack/Detectors/Description.php b/src/Adapters/ImageShack/Detectors/Description.php index a30638b6..ecd7af69 100644 --- a/src/Adapters/ImageShack/Detectors/Description.php +++ b/src/Adapters/ImageShack/Detectors/Description.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\ImageShack\Detectors; +use Embed\Adapters\ImageShack\Extractor; use Embed\Detectors\Description as Detector; class Description extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('description') - ?: parent::detect(); + $result = $api->str('description'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/ImageShack/Detectors/Image.php b/src/Adapters/ImageShack/Detectors/Image.php index 102b7619..c11e8910 100644 --- a/src/Adapters/ImageShack/Detectors/Image.php +++ b/src/Adapters/ImageShack/Detectors/Image.php @@ -3,6 +3,7 @@ namespace Embed\Adapters\ImageShack\Detectors; +use Embed\Adapters\ImageShack\Extractor; use Embed\Detectors\Image as Detector; use Psr\Http\Message\UriInterface; @@ -10,9 +11,11 @@ class Image extends Detector { public function detect(): ?UriInterface { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->url('direct_link') - ?: parent::detect(); + $result = $api->url('direct_link'); + return $result !== null ? $result : parent::detect(); } } diff --git a/src/Adapters/ImageShack/Detectors/PublishedTime.php b/src/Adapters/ImageShack/Detectors/PublishedTime.php index 969804e9..8224860e 100644 --- a/src/Adapters/ImageShack/Detectors/PublishedTime.php +++ b/src/Adapters/ImageShack/Detectors/PublishedTime.php @@ -4,15 +4,18 @@ namespace Embed\Adapters\ImageShack\Detectors; use DateTime; +use Embed\Adapters\ImageShack\Extractor; use Embed\Detectors\PublishedTime as Detector; class PublishedTime extends Detector { public function detect(): ?DateTime { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->time('creation_date') - ?: parent::detect(); + $result = $api->time('creation_date'); + return $result !== null ? $result : parent::detect(); } } diff --git a/src/Adapters/ImageShack/Detectors/Title.php b/src/Adapters/ImageShack/Detectors/Title.php index 6ea32d13..4d74f0cd 100644 --- a/src/Adapters/ImageShack/Detectors/Title.php +++ b/src/Adapters/ImageShack/Detectors/Title.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\ImageShack\Detectors; +use Embed\Adapters\ImageShack\Extractor; use Embed\Detectors\Title as Detector; class Title extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('title') - ?: parent::detect(); + $result = $api->str('title'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/ImageShack/Extractor.php b/src/Adapters/ImageShack/Extractor.php index c865c7e1..327bf9b0 100644 --- a/src/Adapters/ImageShack/Extractor.php +++ b/src/Adapters/ImageShack/Extractor.php @@ -4,20 +4,35 @@ namespace Embed\Adapters\ImageShack; use Embed\Extractor as Base; +use Embed\Http\Crawler; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\UriInterface; class Extractor extends Base { private Api $api; + public function __construct( + UriInterface $uri, + RequestInterface $request, + ResponseInterface $response, + Crawler $crawler + ) { + parent::__construct($uri, $request, $response, $crawler); + $this->api = new Api($this); + } + public function getApi(): Api { return $this->api; } + /** + * @return array + */ public function createCustomDetectors(): array { - $this->api = new Api($this); - return [ 'authorName' => new Detectors\AuthorName($this), 'authorUrl' => new Detectors\AuthorUrl($this), diff --git a/src/Adapters/Instagram/OEmbed.php b/src/Adapters/Instagram/OEmbed.php index 427a7ed4..a0c7bd20 100644 --- a/src/Adapters/Instagram/OEmbed.php +++ b/src/Adapters/Instagram/OEmbed.php @@ -14,7 +14,7 @@ protected function detectEndpoint(): ?UriInterface { $token = $this->extractor->getSetting('instagram:token'); - if (!$token) { + if (!is_string($token) || $token === '') { return null; } diff --git a/src/Adapters/Pinterest/Detectors/Code.php b/src/Adapters/Pinterest/Detectors/Code.php index 4d38724e..b033ec70 100644 --- a/src/Adapters/Pinterest/Detectors/Code.php +++ b/src/Adapters/Pinterest/Detectors/Code.php @@ -12,8 +12,12 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + if ($result !== null) { + return $result; + } + + return $this->fallback(); } private function fallback(): ?EmbedCode diff --git a/src/Adapters/Pinterest/Extractor.php b/src/Adapters/Pinterest/Extractor.php index 5b5c40fa..39aa79dd 100644 --- a/src/Adapters/Pinterest/Extractor.php +++ b/src/Adapters/Pinterest/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Sassmeister/Detectors/Code.php b/src/Adapters/Sassmeister/Detectors/Code.php index 7ad83746..f9b39b67 100644 --- a/src/Adapters/Sassmeister/Detectors/Code.php +++ b/src/Adapters/Sassmeister/Detectors/Code.php @@ -12,8 +12,8 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): ?EmbedCode diff --git a/src/Adapters/Sassmeister/Extractor.php b/src/Adapters/Sassmeister/Extractor.php index e36e3dc6..718e79e0 100644 --- a/src/Adapters/Sassmeister/Extractor.php +++ b/src/Adapters/Sassmeister/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Slides/Detectors/Code.php b/src/Adapters/Slides/Detectors/Code.php index 5ae51422..8e6ffb91 100644 --- a/src/Adapters/Slides/Detectors/Code.php +++ b/src/Adapters/Slides/Detectors/Code.php @@ -10,10 +10,10 @@ class Code extends Detector { - public function detect(): ?EmbedCode + public function detect(): EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): EmbedCode diff --git a/src/Adapters/Slides/Extractor.php b/src/Adapters/Slides/Extractor.php index 96900794..949cc7b7 100644 --- a/src/Adapters/Slides/Extractor.php +++ b/src/Adapters/Slides/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Snipplr/Detectors/Code.php b/src/Adapters/Snipplr/Detectors/Code.php index aadbb1d9..2663039f 100644 --- a/src/Adapters/Snipplr/Detectors/Code.php +++ b/src/Adapters/Snipplr/Detectors/Code.php @@ -12,8 +12,8 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): ?EmbedCode diff --git a/src/Adapters/Snipplr/Extractor.php b/src/Adapters/Snipplr/Extractor.php index a0a73086..06ab210b 100644 --- a/src/Adapters/Snipplr/Extractor.php +++ b/src/Adapters/Snipplr/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Twitch/Detectors/Code.php b/src/Adapters/Twitch/Detectors/Code.php index 1f333bcf..2780dc4b 100644 --- a/src/Adapters/Twitch/Detectors/Code.php +++ b/src/Adapters/Twitch/Detectors/Code.php @@ -11,8 +11,8 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== null ? $result : $this->fallback(); } private function fallback(): ?EmbedCode @@ -20,15 +20,17 @@ private function fallback(): ?EmbedCode $path = $this->extractor->getUri()->getPath(); $parent = $this->extractor->getSetting('twitch:parent'); - if ($id = self::getVideoId($path)) { - $code = $parent + $id = self::getVideoId($path); + if ($id !== null) { + $code = $parent !== null ? self::generateIframeCode(['id' => $id, 'parent' => $parent]) : self::generateJsCode('video', $id); return new EmbedCode($code, 620, 378); } - if ($id = self::getChannelId($path)) { - $code = $parent + $id = self::getChannelId($path); + if ($id !== null) { + $code = $parent !== null ? self::generateIframeCode(['channel' => $id, 'parent' => $parent]) : self::generateJsCode('channel', $id); return new EmbedCode($code, 620, 378); @@ -39,7 +41,7 @@ private function fallback(): ?EmbedCode private static function getVideoId(string $path): ?string { - if (preg_match('#^/videos/(\d+)$#', $path, $matches)) { + if (preg_match('#^/videos/(\d+)$#', $path, $matches) === 1) { return $matches[1]; } @@ -48,13 +50,16 @@ private static function getVideoId(string $path): ?string private static function getChannelId(string $path): ?string { - if (preg_match('#^/(\w+)$#', $path, $matches)) { + if (preg_match('#^/(\w+)$#', $path, $matches) === 1) { return $matches[1]; } return null; } + /** + * @param array $params + */ private static function generateIframeCode(array $params): string { $query = http_build_query(['autoplay' => 'false'] + $params); @@ -69,7 +74,7 @@ private static function generateIframeCode(array $params): string ]); } - private static function generateJsCode($key, $value) + private static function generateJsCode(string $key, string $value): string { return << diff --git a/src/Adapters/Twitch/Extractor.php b/src/Adapters/Twitch/Extractor.php index a36d27f3..990f62b2 100644 --- a/src/Adapters/Twitch/Extractor.php +++ b/src/Adapters/Twitch/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/Adapters/Twitter/Api.php b/src/Adapters/Twitter/Api.php index a03be6d1..e60caaae 100644 --- a/src/Adapters/Twitter/Api.php +++ b/src/Adapters/Twitter/Api.php @@ -10,23 +10,26 @@ class Api { use HttpApiTrait; + /** + * @return array + */ protected function fetchData(): array { $token = $this->extractor->getSetting('twitter:token'); - if (!$token) { + if (!is_string($token) || $token === '') { return []; } - + $uri = $this->extractor->getUri(); $id = getDirectory($uri->getPath(), 2); - if (empty($id)) { + if ($id === null || $id === '' || $id === '0') { return []; } - $this->extractor->getCrawler()->addDefaultHeaders(array('Authorization' => "Bearer $token")); + $this->extractor->getCrawler()->addDefaultHeaders(array('Authorization' => "Bearer {$token}")); $this->endpoint = $this->extractor->getCrawler()->createUri("https://api.twitter.com/2/tweets/{$id}?expansions=author_id,attachments.media_keys&tweet.fields=created_at&media.fields=preview_image_url,url&user.fields=id,name"); return $this->fetchJSON($this->endpoint); diff --git a/src/Adapters/Twitter/Detectors/AuthorName.php b/src/Adapters/Twitter/Detectors/AuthorName.php index 5409ad4b..d9c050dc 100644 --- a/src/Adapters/Twitter/Detectors/AuthorName.php +++ b/src/Adapters/Twitter/Detectors/AuthorName.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Twitter\Detectors; +use Embed\Adapters\Twitter\Extractor; use Embed\Detectors\AuthorName as Detector; class AuthorName extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); - - return $api->str('includes', 'users', '0', 'name') - ?: parent::detect(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); + + $result = $api->str('includes', 'users', '0', 'name'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Twitter/Detectors/AuthorUrl.php b/src/Adapters/Twitter/Detectors/AuthorUrl.php index 23a11d7b..c30c47dd 100644 --- a/src/Adapters/Twitter/Detectors/AuthorUrl.php +++ b/src/Adapters/Twitter/Detectors/AuthorUrl.php @@ -3,6 +3,7 @@ namespace Embed\Adapters\Twitter\Detectors; +use Embed\Adapters\Twitter\Extractor; use Embed\Detectors\AuthorUrl as Detector; use Psr\Http\Message\UriInterface; @@ -10,11 +11,15 @@ class AuthorUrl extends Detector { public function detect(): ?UriInterface { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); $username = $api->str('includes', 'users', '0', 'username'); - if ($username) { - return $this->extractor->getCrawler()->createUri("https://twitter.com/{$username}"); + // Exclude empty string and '0' to maintain original truthy check behavior + // The string '0' is not a valid Twitter username and should not generate a URL + if (is_string($username) && $username !== '' && $username !== '0') { + return $extractor->getCrawler()->createUri("https://twitter.com/{$username}"); } return parent::detect(); diff --git a/src/Adapters/Twitter/Detectors/Description.php b/src/Adapters/Twitter/Detectors/Description.php index 2b19afad..d61db345 100644 --- a/src/Adapters/Twitter/Detectors/Description.php +++ b/src/Adapters/Twitter/Detectors/Description.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Twitter\Detectors; +use Embed\Adapters\Twitter\Extractor; use Embed\Detectors\Description as Detector; class Description extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('data', 'text') - ?: parent::detect(); + $result = $api->str('data', 'text'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Twitter/Detectors/Image.php b/src/Adapters/Twitter/Detectors/Image.php index 90344335..75fad8e0 100644 --- a/src/Adapters/Twitter/Detectors/Image.php +++ b/src/Adapters/Twitter/Detectors/Image.php @@ -3,6 +3,7 @@ namespace Embed\Adapters\Twitter\Detectors; +use Embed\Adapters\Twitter\Extractor; use Embed\Detectors\Image as Detector; use Psr\Http\Message\UriInterface; @@ -10,16 +11,18 @@ class Image extends Detector { public function detect(): ?UriInterface { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); $preview = $api->url('includes', 'media', '0', 'preview_image_url'); - - if ($preview) { + + if ($preview !== null) { return $preview; } $regular = $api->url('includes', 'media', '0', 'url'); - if ($regular) { + if ($regular !== null) { return $regular; } diff --git a/src/Adapters/Twitter/Detectors/PublishedTime.php b/src/Adapters/Twitter/Detectors/PublishedTime.php index 73672988..49cef116 100644 --- a/src/Adapters/Twitter/Detectors/PublishedTime.php +++ b/src/Adapters/Twitter/Detectors/PublishedTime.php @@ -4,15 +4,18 @@ namespace Embed\Adapters\Twitter\Detectors; use DateTime; +use Embed\Adapters\Twitter\Extractor; use Embed\Detectors\PublishedTime as Detector; class PublishedTime extends Detector { public function detect(): ?DateTime { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->time('data', 'created_at') - ?: parent::detect(); + $result = $api->time('data', 'created_at'); + return $result !== null ? $result : parent::detect(); } } diff --git a/src/Adapters/Twitter/Detectors/Title.php b/src/Adapters/Twitter/Detectors/Title.php index 58c770bb..36e8c127 100644 --- a/src/Adapters/Twitter/Detectors/Title.php +++ b/src/Adapters/Twitter/Detectors/Title.php @@ -3,16 +3,19 @@ namespace Embed\Adapters\Twitter\Detectors; +use Embed\Adapters\Twitter\Extractor; use Embed\Detectors\Title as Detector; class Title extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); $name = $api->str('includes', 'users', '0', 'name'); - if ($name) { + if ($name !== null) { return "Tweet by $name"; } diff --git a/src/Adapters/Twitter/Extractor.php b/src/Adapters/Twitter/Extractor.php index 2cb2c459..0908b5ac 100644 --- a/src/Adapters/Twitter/Extractor.php +++ b/src/Adapters/Twitter/Extractor.php @@ -4,20 +4,35 @@ namespace Embed\Adapters\Twitter; use Embed\Extractor as Base; +use Embed\Http\Crawler; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\UriInterface; class Extractor extends Base { private Api $api; + public function __construct( + UriInterface $uri, + RequestInterface $request, + ResponseInterface $response, + Crawler $crawler + ) { + parent::__construct($uri, $request, $response, $crawler); + $this->api = new Api($this); + } + public function getApi(): Api { return $this->api; } + /** + * @return array + */ public function createCustomDetectors(): array { - $this->api = new Api($this); - return [ 'authorName' => new Detectors\AuthorName($this), 'authorUrl' => new Detectors\AuthorUrl($this), diff --git a/src/Adapters/Wikipedia/Api.php b/src/Adapters/Wikipedia/Api.php index 36b5233b..4ddc3025 100644 --- a/src/Adapters/Wikipedia/Api.php +++ b/src/Adapters/Wikipedia/Api.php @@ -11,6 +11,9 @@ class Api { use HttpApiTrait; + /** + * @return array + */ protected function fetchData(): array { $uri = $this->extractor->getUri(); @@ -33,8 +36,17 @@ protected function fetchData(): array ])); $data = $this->fetchJSON($this->endpoint); - $pages = $data['query']['pages'] ?? null; - return $pages ? current($pages) : null; + if (isset($data['query']) && is_array($data['query']) && isset($data['query']['pages']) && is_array($data['query']['pages'])) { + $pages = $data['query']['pages']; + $result = current($pages); + if (is_array($result)) { + /** @var array */ + $typedResult = $result; + return $typedResult; + } + } + + return []; } } diff --git a/src/Adapters/Wikipedia/Detectors/Description.php b/src/Adapters/Wikipedia/Detectors/Description.php index dc281dc0..fdb18dd3 100644 --- a/src/Adapters/Wikipedia/Detectors/Description.php +++ b/src/Adapters/Wikipedia/Detectors/Description.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Wikipedia\Detectors; +use Embed\Adapters\Wikipedia\Extractor; use Embed\Detectors\Description as Detector; class Description extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('extract') - ?: parent::detect(); + $result = $api->str('extract'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Wikipedia/Detectors/Title.php b/src/Adapters/Wikipedia/Detectors/Title.php index 0b531335..bfe001af 100644 --- a/src/Adapters/Wikipedia/Detectors/Title.php +++ b/src/Adapters/Wikipedia/Detectors/Title.php @@ -3,15 +3,18 @@ namespace Embed\Adapters\Wikipedia\Detectors; +use Embed\Adapters\Wikipedia\Extractor; use Embed\Detectors\Title as Detector; class Title extends Detector { public function detect(): ?string { - $api = $this->extractor->getApi(); + /** @var Extractor $extractor */ + $extractor = $this->extractor; + $api = $extractor->getApi(); - return $api->str('title') - ?: parent::detect(); + $result = $api->str('title'); + return (is_string($result) && trim($result) !== '') ? $result : parent::detect(); } } diff --git a/src/Adapters/Wikipedia/Extractor.php b/src/Adapters/Wikipedia/Extractor.php index 6cc0e1f2..75afb5ea 100644 --- a/src/Adapters/Wikipedia/Extractor.php +++ b/src/Adapters/Wikipedia/Extractor.php @@ -4,20 +4,35 @@ namespace Embed\Adapters\Wikipedia; use Embed\Extractor as Base; +use Embed\Http\Crawler; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\UriInterface; class Extractor extends Base { private Api $api; + public function __construct( + UriInterface $uri, + RequestInterface $request, + ResponseInterface $response, + Crawler $crawler + ) { + parent::__construct($uri, $request, $response, $crawler); + $this->api = new Api($this); + } + public function getApi(): Api { return $this->api; } + /** + * @return array + */ public function createCustomDetectors(): array { - $this->api = new Api($this); - return [ 'title' => new Detectors\Title($this), 'description' => new Detectors\Description($this), diff --git a/src/Adapters/Youtube/Detectors/Feeds.php b/src/Adapters/Youtube/Detectors/Feeds.php index aac95531..0bc9559f 100644 --- a/src/Adapters/Youtube/Detectors/Feeds.php +++ b/src/Adapters/Youtube/Detectors/Feeds.php @@ -15,10 +15,13 @@ class Feeds extends Detector */ public function detect(): array { - return parent::detect() - ?: $this->fallback(); + $result = parent::detect(); + return $result !== [] ? $result : $this->fallback(); } + /** + * @return UriInterface[] + */ private function fallback(): array { $uri = $this->extractor->getUri(); diff --git a/src/Adapters/Youtube/Extractor.php b/src/Adapters/Youtube/Extractor.php index ce299d28..ea43c122 100644 --- a/src/Adapters/Youtube/Extractor.php +++ b/src/Adapters/Youtube/Extractor.php @@ -7,6 +7,9 @@ class Extractor extends Base { + /** + * @return array + */ public function createCustomDetectors(): array { return [ diff --git a/src/ApiTrait.php b/src/ApiTrait.php index 3dcb781b..696b5a72 100644 --- a/src/ApiTrait.php +++ b/src/ApiTrait.php @@ -10,28 +10,35 @@ trait ApiTrait { protected Extractor $extractor; - private array $data; + /** @var array */ + private array $data = []; public function __construct(Extractor $extractor) { $this->extractor = $extractor; } + /** + * @return array + */ public function all(): array { - if (!isset($this->data)) { + if ($this->data === []) { $this->data = $this->fetchData(); } return $this->data; } + /** + * @return mixed + */ public function get(string ...$keys) { $data = $this->all(); foreach ($keys as $key) { - if (!isset($data[$key])) { + if (!is_array($data) || !isset($data[$key])) { return null; } @@ -49,13 +56,22 @@ public function str(string ...$keys): ?string $value = array_shift($value); } - return $value ? clean((string) $value) : null; + if (is_string($value)) { + return clean($value); + } elseif (is_scalar($value)) { + return clean((string) $value); + } + + return null; } + /** + * @return string[] + */ public function strAll(string ...$keys): array { $all = (array) $this->get(...$keys); - return array_filter(array_map(fn ($value) => clean($value), $all)); + return array_filter(array_map(fn ($value) => is_string($value) ? clean($value) : null, $all), fn ($value) => $value !== null); } public function html(string ...$keys): ?string @@ -66,7 +82,13 @@ public function html(string ...$keys): ?string $value = array_shift($value); } - return $value ? clean((string) $value, true) : null; + if (is_string($value)) { + return clean($value, true); + } elseif (is_scalar($value)) { + return clean((string) $value, true); + } + + return null; } public function int(string ...$keys): ?int @@ -85,7 +107,7 @@ public function url(string ...$keys): ?UriInterface $url = $this->str(...$keys); try { - return $url ? $this->extractor->resolveUri($url) : null; + return $url !== null ? $this->extractor->resolveUri($url) : null; } catch (Throwable $error) { return null; } @@ -94,13 +116,13 @@ public function url(string ...$keys): ?UriInterface public function time(string ...$keys): ?DateTime { $time = $this->str(...$keys); - $datetime = $time ? date_create($time) : null; + $datetime = $time !== null ? date_create($time) : null; - if (!$datetime && $time && ctype_digit($time)) { + if ($datetime === false && $time !== null && ctype_digit($time)) { $datetime = date_create_from_format('U', $time); } - return ($datetime && $datetime->getTimestamp() > 0) ? $datetime : null; + return ($datetime !== false && $datetime !== null && $datetime->getTimestamp() > 0) ? $datetime : null; } abstract protected function fetchData(): array; diff --git a/src/Detectors/AuthorName.php b/src/Detectors/AuthorName.php index 17433c41..3d1dc67b 100644 --- a/src/Detectors/AuthorName.php +++ b/src/Detectors/AuthorName.php @@ -10,15 +10,19 @@ public function detect(): ?string $oembed = $this->extractor->getOEmbed(); $metas = $this->extractor->getMetas(); - return $oembed->str('author_name') - ?: $metas->str( - 'article:author', - 'book:author', - 'sailthru.author', - 'lp.article:author', - 'twitter:creator', - 'dcterms.creator', - 'author' - ); + $result = $oembed->str('author_name'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + return $metas->str( + 'article:author', + 'book:author', + 'sailthru.author', + 'lp.article:author', + 'twitter:creator', + 'dcterms.creator', + 'author' + ); } } diff --git a/src/Detectors/AuthorUrl.php b/src/Detectors/AuthorUrl.php index fe1b564c..c11f2af1 100644 --- a/src/Detectors/AuthorUrl.php +++ b/src/Detectors/AuthorUrl.php @@ -11,8 +11,8 @@ public function detect(): ?UriInterface { $oembed = $this->extractor->getOEmbed(); - return $oembed->url('author_url') - ?: $this->detectFromTwitter(); + $result = $oembed->url('author_url'); + return $result !== null ? $result : $this->detectFromTwitter(); } private function detectFromTwitter(): ?UriInterface @@ -22,7 +22,7 @@ private function detectFromTwitter(): ?UriInterface $user = $metas->str('twitter:creator'); - return $user + return $user !== null ? $crawler->createUri(sprintf('https://twitter.com/%s', ltrim($user, '@'))) : null; } diff --git a/src/Detectors/Cms.php b/src/Detectors/Cms.php index c43f4d8f..027de530 100644 --- a/src/Detectors/Cms.php +++ b/src/Detectors/Cms.php @@ -12,9 +12,9 @@ class Cms extends Detector public function detect(): ?string { - $cms = self::detectFromHost($this->extractor->url->getHost()); + $cms = self::detectFromHost($this->extractor->getUri()->getHost()); - if ($cms) { + if ($cms !== null) { return $cms; } @@ -22,7 +22,8 @@ public function detect(): ?string $generators = $document->select('.//meta', ['name' => 'generator'])->strAll('content'); foreach ($generators as $generator) { - if ($cms = self::detectFromGenerator($generator)) { + $cms = self::detectFromGenerator($generator); + if ($cms !== null) { return $cms; } } diff --git a/src/Detectors/Code.php b/src/Detectors/Code.php index a7b91600..883ee85a 100644 --- a/src/Detectors/Code.php +++ b/src/Detectors/Code.php @@ -10,10 +10,22 @@ class Code extends Detector { public function detect(): ?EmbedCode { - return $this->detectFromEmbed() - ?: $this->detectFromOpenGraph() - ?: $this->detectFromTwitter() - ?: $this->detectFromContentType(); + $result = $this->detectFromEmbed(); + if ($result !== null) { + return $result; + } + + $result = $this->detectFromOpenGraph(); + if ($result !== null) { + return $result; + } + + $result = $this->detectFromTwitter(); + if ($result !== null) { + return $result; + } + + return $this->detectFromContentType(); } private function detectFromEmbed(): ?EmbedCode @@ -21,7 +33,7 @@ private function detectFromEmbed(): ?EmbedCode $oembed = $this->extractor->getOEmbed(); $html = $oembed->html('html'); - if (!$html) { + if ($html === null) { return null; } @@ -38,11 +50,12 @@ private function detectFromOpenGraph(): ?EmbedCode $url = $metas->url('og:video:secure_url', 'og:video:url', 'og:video'); - if (!$url) { + if ($url === null) { return null; } - if (!($type = pathinfo($url->getPath(), PATHINFO_EXTENSION))) { + $type = pathinfo($url->getPath(), PATHINFO_EXTENSION); + if ($type === '') { $type = $metas->str('og:video_type'); } @@ -87,7 +100,7 @@ private function detectFromTwitter(): ?EmbedCode $url = $metas->url('twitter:player'); - if (!$url) { + if ($url === null) { return null; } @@ -105,14 +118,14 @@ private function detectFromTwitter(): ?EmbedCode return new EmbedCode($code, $width, $height); } - private function detectFromContentType() + private function detectFromContentType(): ?EmbedCode { if (!$this->extractor->getResponse()->hasHeader('content-type')) { return null; } $contentType = $this->extractor->getResponse()->getHeader('content-type')[0]; - $isBinary = !preg_match('/(text|html|json)/', strtolower($contentType)); + $isBinary = preg_match('/(text|html|json)/', strtolower($contentType)) !== 1; if (!$isBinary) { return null; } diff --git a/src/Detectors/Description.php b/src/Detectors/Description.php index 90892d11..0fffdee2 100644 --- a/src/Detectors/Description.php +++ b/src/Detectors/Description.php @@ -11,18 +11,26 @@ public function detect(): ?string $metas = $this->extractor->getMetas(); $ld = $this->extractor->getLinkedData(); - return $oembed->str('description') - ?: $metas->str( - 'og:description', - 'twitter:description', - 'lp:description', - 'description', - 'article:description', - 'dcterms.description', - 'sailthru.description', - 'excerpt', - 'article.summary' - ) - ?: $ld->str('description'); + $result = $oembed->str('description'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + $result = $metas->str( + 'og:description', + 'twitter:description', + 'lp:description', + 'description', + 'article:description', + 'dcterms.description', + 'sailthru.description', + 'excerpt', + 'article.summary' + ); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + return $ld->str('description'); } } diff --git a/src/Detectors/Detector.php b/src/Detectors/Detector.php index 1d6bfb00..ca67a7ba 100644 --- a/src/Detectors/Detector.php +++ b/src/Detectors/Detector.php @@ -8,16 +8,20 @@ abstract class Detector { protected Extractor $extractor; - private array $cache; + /** @var array */ + private array $cache = []; public function __construct(Extractor $extractor) { $this->extractor = $extractor; } + /** + * @return mixed + */ public function get() { - if (!isset($this->cache)) { + if (!isset($this->cache['cached'])) { $this->cache = [ 'cached' => true, 'value' => $this->detect(), @@ -27,5 +31,8 @@ public function get() return $this->cache['value']; } + /** + * @return mixed + */ abstract public function detect(); } diff --git a/src/Detectors/Favicon.php b/src/Detectors/Favicon.php index 93a0a283..dbeca1ab 100644 --- a/src/Detectors/Favicon.php +++ b/src/Detectors/Favicon.php @@ -11,8 +11,16 @@ public function detect(): UriInterface { $document = $this->extractor->getDocument(); - return $document->link('shortcut icon') - ?: $document->link('icon') - ?: $this->extractor->getUri()->withPath('/favicon.ico')->withQuery(''); + $result = $document->link('shortcut icon'); + if ($result !== null) { + return $result; + } + + $result = $document->link('icon'); + if ($result !== null) { + return $result; + } + + return $this->extractor->getUri()->withPath('/favicon.ico')->withQuery(''); } } diff --git a/src/Detectors/Feeds.php b/src/Detectors/Feeds.php index dab87f09..8660726a 100644 --- a/src/Detectors/Feeds.php +++ b/src/Detectors/Feeds.php @@ -5,7 +5,8 @@ class Feeds extends Detector { - private static $types = [ + /** @var string[] */ + private static array $types = [ 'application/atom+xml', 'application/json', 'application/rdf+xml', @@ -25,7 +26,7 @@ public function detect(): array foreach (self::$types as $type) { $href = $document->link('alternate', ['type' => $type]); - if ($href) { + if ($href !== null) { $feeds[] = $href; } } diff --git a/src/Detectors/Icon.php b/src/Detectors/Icon.php index 0d114f03..29f24d44 100644 --- a/src/Detectors/Icon.php +++ b/src/Detectors/Icon.php @@ -11,10 +11,26 @@ public function detect(): ?UriInterface { $document = $this->extractor->getDocument(); - return $document->link('apple-touch-icon-precomposed') - ?: $document->link('apple-touch-icon') - ?: $document->link('icon', ['sizes' => '144x144']) - ?: $document->link('icon', ['sizes' => '96x96']) - ?: $document->link('icon', ['sizes' => '48x48']); + $result = $document->link('apple-touch-icon-precomposed'); + if ($result !== null) { + return $result; + } + + $result = $document->link('apple-touch-icon'); + if ($result !== null) { + return $result; + } + + $result = $document->link('icon', ['sizes' => '144x144']); + if ($result !== null) { + return $result; + } + + $result = $document->link('icon', ['sizes' => '96x96']); + if ($result !== null) { + return $result; + } + + return $document->link('icon', ['sizes' => '48x48']); } } diff --git a/src/Detectors/Image.php b/src/Detectors/Image.php index 04562f26..d7ffa4f2 100644 --- a/src/Detectors/Image.php +++ b/src/Detectors/Image.php @@ -14,16 +14,40 @@ public function detect(): ?UriInterface $metas = $this->extractor->getMetas(); $ld = $this->extractor->getLinkedData(); - return $oembed->url('image') - ?: $oembed->url('thumbnail') - ?: $oembed->url('thumbnail_url') - ?: $metas->url('og:image', 'og:image:url', 'og:image:secure_url', 'twitter:image', 'twitter:image:src', 'lp:image') - ?: $document->link('image_src') - ?: $ld->url('image.url') - ?: $this->detectFromContentType(); + $result = $oembed->url('image'); + if ($result !== null) { + return $result; + } + + $result = $oembed->url('thumbnail'); + if ($result !== null) { + return $result; + } + + $result = $oembed->url('thumbnail_url'); + if ($result !== null) { + return $result; + } + + $result = $metas->url('og:image', 'og:image:url', 'og:image:secure_url', 'twitter:image', 'twitter:image:src', 'lp:image'); + if ($result !== null) { + return $result; + } + + $result = $document->link('image_src'); + if ($result !== null) { + return $result; + } + + $result = $ld->url('image.url'); + if ($result !== null) { + return $result; + } + + return $this->detectFromContentType(); } - private function detectFromContentType() + private function detectFromContentType(): ?\Psr\Http\Message\UriInterface { if (!$this->extractor->getResponse()->hasHeader('content-type')) { return null; @@ -34,5 +58,7 @@ private function detectFromContentType() if (strpos($contentType, 'image/') === 0) { return $this->extractor->getUri(); } + + return null; } } diff --git a/src/Detectors/Keywords.php b/src/Detectors/Keywords.php index 000a1e06..a4ce5b9b 100644 --- a/src/Detectors/Keywords.php +++ b/src/Detectors/Keywords.php @@ -5,6 +5,9 @@ class Keywords extends Detector { + /** + * @return string[] + */ public function detect(): array { $tags = []; @@ -24,25 +27,30 @@ public function detect(): array foreach ($types as $type) { $value = $metas->strAll($type); - if ($value) { + if ($value !== []) { $tags = array_merge($tags, self::toArray($value)); } } $value = $ld->strAll('keywords'); - if ($value) { + if ($value !== []) { $tags = array_merge($tags, self::toArray($value)); } + /** @var array */ $tags = array_map('mb_strtolower', $tags); $tags = array_unique($tags); - $tags = array_filter($tags); + $tags = array_filter($tags, fn ($value) => $value !== '' && $value !== '0'); $tags = array_values($tags); return $tags; } + /** + * @param string[] $keywords + * @return string[] + */ private static function toArray(array $keywords): array { $all = []; @@ -52,7 +60,7 @@ private static function toArray(array $keywords): array $tags = array_map('trim', $tags); $tags = array_filter( $tags, - fn ($value) => !empty($value) && substr($value, -3) !== '...' + fn ($value) => $value !== '' && $value !== '0' && substr($value, -3) !== '...' ); $all = array_merge($all, $tags); diff --git a/src/Detectors/Language.php b/src/Detectors/Language.php index e328260b..ed667db3 100644 --- a/src/Detectors/Language.php +++ b/src/Detectors/Language.php @@ -11,10 +11,26 @@ public function detect(): ?string $metas = $this->extractor->getMetas(); $ld = $this->extractor->getLinkedData(); - return $document->select('/html')->str('lang') - ?: $document->select('/html')->str('xml:lang') - ?: $metas->str('language', 'lang', 'og:locale', 'dc:language') - ?: $document->select('.//meta', ['http-equiv' => 'content-language'])->str('content') - ?: $ld->str('inLanguage'); + $result = $document->select('/html')->str('lang'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + $result = $document->select('/html')->str('xml:lang'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + $result = $metas->str('language', 'lang', 'og:locale', 'dc:language'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + $result = $document->select('.//meta', ['http-equiv' => 'content-language'])->str('content'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + return $ld->str('inLanguage'); } } diff --git a/src/Detectors/Languages.php b/src/Detectors/Languages.php index 6fbe4e33..eb765a6a 100644 --- a/src/Detectors/Languages.php +++ b/src/Detectors/Languages.php @@ -8,7 +8,7 @@ class Languages extends Detector { /** - * @return \Psr\Http\Message\UriInterface[] + * @return array */ public function detect(): array { @@ -16,6 +16,10 @@ public function detect(): array $languages = []; foreach ($document->select('.//link[@hreflang]')->nodes() as $node) { + if (!$node instanceof \DOMElement) { + continue; + } + $language = $node->getAttribute('hreflang'); $href = $node->getAttribute('href'); diff --git a/src/Detectors/License.php b/src/Detectors/License.php index 5afddbf4..3c74d29f 100644 --- a/src/Detectors/License.php +++ b/src/Detectors/License.php @@ -10,7 +10,7 @@ public function detect(): ?string $oembed = $this->extractor->getOEmbed(); $metas = $this->extractor->getMetas(); - return $oembed->str('license_url') - ?: $metas->str('copyright'); + $license = $oembed->str('license_url'); + return $license !== null ? $license : $metas->str('copyright'); } } diff --git a/src/Detectors/ProviderName.php b/src/Detectors/ProviderName.php index e92f2fc0..4ae7aa84 100644 --- a/src/Detectors/ProviderName.php +++ b/src/Detectors/ProviderName.php @@ -5,6 +5,7 @@ class ProviderName extends Detector { + /** @var string[] */ private static array $suffixes; public function detect(): string @@ -12,14 +13,22 @@ public function detect(): string $oembed = $this->extractor->getOEmbed(); $metas = $this->extractor->getMetas(); - return $oembed->str('provider_name') - ?: $metas->str( - 'og:site_name', - 'dcterms.publisher', - 'publisher', - 'article:publisher' - ) - ?: ucfirst($this->fallback()); + $result = $oembed->str('provider_name'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + $result = $metas->str( + 'og:site_name', + 'dcterms.publisher', + 'publisher', + 'article:publisher' + ); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + return ucfirst($this->fallback()); } private function fallback(): string @@ -45,10 +54,15 @@ private function fallback(): string } } + /** + * @return string[] + */ private static function getSuffixes(): array { if (!isset(self::$suffixes)) { - self::$suffixes = require dirname(__DIR__).'/resources/suffix.php'; + /** @var string[] */ + $suffixes = require dirname(__DIR__).'/resources/suffix.php'; + self::$suffixes = $suffixes; } return self::$suffixes; diff --git a/src/Detectors/ProviderUrl.php b/src/Detectors/ProviderUrl.php index 9ca9ab6d..7396fd51 100644 --- a/src/Detectors/ProviderUrl.php +++ b/src/Detectors/ProviderUrl.php @@ -12,9 +12,17 @@ public function detect(): UriInterface $oembed = $this->extractor->getOEmbed(); $metas = $this->extractor->getMetas(); - return $oembed->url('provider_url') - ?: $metas->url('og:website') - ?: $this->fallback(); + $result = $oembed->url('provider_url'); + if ($result !== null) { + return $result; + } + + $result = $metas->url('og:website'); + if ($result !== null) { + return $result; + } + + return $this->fallback(); } private function fallback(): UriInterface diff --git a/src/Detectors/PublishedTime.php b/src/Detectors/PublishedTime.php index f168120f..5b9c097d 100644 --- a/src/Detectors/PublishedTime.php +++ b/src/Detectors/PublishedTime.php @@ -13,34 +13,50 @@ public function detect(): ?DateTime $metas = $this->extractor->getMetas(); $ld = $this->extractor->getLinkedData(); - return $oembed->time('pubdate') - ?: $metas->time( - 'article:published_time', - 'created', - 'date', - 'datepublished', - 'music:release_date', - 'video:release_date', - 'newsrepublic:publish_date' - ) - ?: $ld->time( - 'pagePublished', - 'datePublished' - ) - ?: $this->detectFromPath() - ?: $metas->time( - 'pagerender', - 'pub_date', - 'publication-date', - 'lp.article:published_time', - 'lp.article:modified_time', - 'publish-date', - 'rc.datecreation', - 'timestamp', - 'sailthru.date', - 'article:modified_time', - 'dcterms.date' - ); + $result = $oembed->time('pubdate'); + if ($result !== null) { + return $result; + } + + $result = $metas->time( + 'article:published_time', + 'created', + 'date', + 'datepublished', + 'music:release_date', + 'video:release_date', + 'newsrepublic:publish_date' + ); + if ($result !== null) { + return $result; + } + + $result = $ld->time( + 'pagePublished', + 'datePublished' + ); + if ($result !== null) { + return $result; + } + + $result = $this->detectFromPath(); + if ($result !== null) { + return $result; + } + + return $metas->time( + 'pagerender', + 'pub_date', + 'publication-date', + 'lp.article:published_time', + 'lp.article:modified_time', + 'publish-date', + 'rc.datecreation', + 'timestamp', + 'sailthru.date', + 'article:modified_time', + 'dcterms.date' + ); } /** @@ -51,8 +67,9 @@ private function detectFromPath(): ?DateTime { $path = $this->extractor->getUri()->getPath(); - if (preg_match('#/(19|20)\d{2}/[0-1]?\d/[0-3]?\d/#', $path, $matches)) { - return date_create_from_format('/Y/m/d/', $matches[0]) ?: null; + if (preg_match('#/(19|20)\d{2}/[0-1]?\d/[0-3]?\d/#', $path, $matches) === 1) { + $date = date_create_from_format('/Y/m/d/', $matches[0]); + return $date !== false ? $date : null; } return null; diff --git a/src/Detectors/Redirect.php b/src/Detectors/Redirect.php index 79edee0d..717bcf96 100644 --- a/src/Detectors/Redirect.php +++ b/src/Detectors/Redirect.php @@ -12,12 +12,12 @@ public function detect(): ?UriInterface $document = $this->extractor->getDocument(); $value = $document->select('.//meta', ['http-equiv' => 'refresh'])->str('content'); - return $value ? $this->extract($value) : null; + return $value !== null ? $this->extract($value) : null; } private function extract(string $value): ?UriInterface { - if (preg_match('/url=(.+)$/i', $value, $match)) { + if (preg_match('/url=(.+)$/i', $value, $match) === 1) { return $this->extractor->resolveUri(trim($match[1], '\'"')); } diff --git a/src/Detectors/Title.php b/src/Detectors/Title.php index 352bff03..cda77ba5 100644 --- a/src/Detectors/Title.php +++ b/src/Detectors/Title.php @@ -11,17 +11,25 @@ public function detect(): ?string $document = $this->extractor->getDocument(); $metas = $this->extractor->getMetas(); - return $oembed->str('title') - ?: $metas->str( - 'og:title', - 'twitter:title', - 'lp:title', - 'dcterms.title', - 'article:title', - 'headline', - 'article.headline', - 'parsely-title' - ) - ?: $document->select('.//head/title')->str(); + $result = $oembed->str('title'); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + $result = $metas->str( + 'og:title', + 'twitter:title', + 'lp:title', + 'dcterms.title', + 'article:title', + 'headline', + 'article.headline', + 'parsely-title' + ); + if (is_string($result) && trim($result) !== '') { + return $result; + } + + return $document->select('.//head/title')->str(); } } diff --git a/src/Detectors/Url.php b/src/Detectors/Url.php index 358dbfd8..41f5acc2 100644 --- a/src/Detectors/Url.php +++ b/src/Detectors/Url.php @@ -11,8 +11,16 @@ public function detect(): UriInterface { $oembed = $this->extractor->getOEmbed(); - return $oembed->url('url') - ?: $oembed->url('web_page') - ?: $this->extractor->getUri(); + $result = $oembed->url('url'); + if ($result !== null) { + return $result; + } + + $result = $oembed->url('web_page'); + if ($result !== null) { + return $result; + } + + return $this->extractor->getUri(); } } diff --git a/src/Document.php b/src/Document.php index ddb8364c..02f3b1e0 100644 --- a/src/Document.php +++ b/src/Document.php @@ -29,18 +29,18 @@ public function __construct(Extractor $extractor) $encoding = null; $contentType = $extractor->getResponse()->getHeaderLine('content-type'); preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $contentType, $match); - if (!empty($match[1])) { + if (isset($match[1]) && $match[1] !== '' && $match[1] !== '0') { $encoding = trim($match[1], ','); $encoding = $this->getValidEncoding($encoding); } - if (is_null($encoding) && !empty($html)) { + if (is_null($encoding) && $html !== '') { preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $html, $match); - if (!empty($match[1])) { + if (isset($match[1]) && $match[1] !== '' && $match[1] !== '0') { $encoding = trim($match[1], ','); $encoding = $this->getValidEncoding($encoding); } } - $this->document = !empty($html) ? Parser::parse($html, $encoding) : new DOMDocument(); + $this->document = $html !== '' ? Parser::parse($html, $encoding) : new DOMDocument(); $this->initXPath(); } @@ -60,21 +60,18 @@ private function getValidEncoding(?string $encoding): ?string { if (PHP_VERSION_ID < 80000) { // PHP 7.4: Check return value (false = invalid encoding) - // Need to check empty() first to avoid Warning + // Need to check null/empty first to avoid Warning // TODO: Remove this entire branch when PHP 7.4 support is dropped - if (empty($encoding)) { + if ($encoding === null || $encoding === '') { return null; } - $ret = mb_encoding_aliases($encoding); - if ($ret === false) { - return null; - } else { - return $encoding; - } + $ret = @mb_encoding_aliases($encoding); + /** @phpstan-ignore function.alreadyNarrowedType (PHP 7.4 returns false for invalid encoding, PHP 8.0+ returns array) */ + return is_array($ret) ? $encoding : null; } else { // PHP 8.0+: ValueError exception is thrown for invalid/empty encoding try { - mb_encoding_aliases($encoding ?? ''); + $aliases = mb_encoding_aliases($encoding ?? ''); // If mb_encoding_aliases succeeds, return the input value as is. Some encodings do not have aliases. return $encoding; } catch (\ValueError $exception) { @@ -83,7 +80,7 @@ private function getValidEncoding(?string $encoding): ?string } } - private function initXPath() + private function initXPath(): void { $this->xpath = new DOMXPath($this->document); $this->xpath->registerNamespace('php', 'http://php.net/xpath'); @@ -98,10 +95,16 @@ public function __clone() public function remove(string $query): void { - $nodes = iterator_to_array($this->xpath->query($query), false); + $result = $this->xpath->query($query); + if ($result === false) { + return; + } + $nodes = iterator_to_array($result, false); foreach ($nodes as $node) { - $node->parentNode->removeChild($node); + if ($node->parentNode !== null) { + $node->parentNode->removeChild($node); + } } } @@ -117,6 +120,8 @@ public function getDocument(): DOMDocument /** * Helper to build xpath queries easily and case insensitive + * + * @param array $attributes */ private static function buildQuery(string $startQuery, array $attributes): string { @@ -131,14 +136,20 @@ private static function buildQuery(string $startQuery, array $attributes): strin /** * Select a element in the dom + * + * @param array|null $attributes */ public function select(string $query, ?array $attributes = null, ?DOMNode $context = null): QueryResult { - if (!empty($attributes)) { + if ($attributes !== null && $attributes !== []) { $query = self::buildQuery($query, $attributes); } - return new QueryResult($this->xpath->query($query, $context), $this->extractor); + $result = $this->xpath->query($query, $context); + if ($result === false) { + $result = new \DOMNodeList(); + } + return new QueryResult($result, $this->extractor); } /** @@ -151,6 +162,8 @@ public function selectCss(string $query, ?DOMNode $context = null): QueryResult /** * Shortcut to select a element and return the href + * + * @param array $extra */ public function link(string $rel, array $extra = []): ?UriInterface { @@ -172,6 +185,6 @@ private static function cssToXpath(string $selector): string self::$cssConverter = new CssSelectorConverter(); } - return self::$cssConverter->toXpath($selector); + return self::$cssConverter->toXPath($selector); } } diff --git a/src/Embed.php b/src/Embed.php index 3366cfef..6f2d583a 100644 --- a/src/Embed.php +++ b/src/Embed.php @@ -14,8 +14,8 @@ class Embed public function __construct(?Crawler $crawler = null, ?ExtractorFactory $extractorFactory = null) { - $this->crawler = $crawler ?: new Crawler(); - $this->extractorFactory = $extractorFactory ?: new ExtractorFactory(); + $this->crawler = $crawler !== null ? $crawler : new Crawler(); + $this->extractorFactory = $extractorFactory !== null ? $extractorFactory : new ExtractorFactory(); } public function get(string $url): Extractor @@ -41,7 +41,10 @@ public function getMulti(string ...$urls): array $return = []; foreach ($responses as $k => $response) { - $return[] = $this->extract($requests[$k], $responses[$k]); + /** @phpstan-ignore instanceof.alwaysTrue (defensive check for error handling) */ + if ($response instanceof ResponseInterface) { + $return[] = $this->extract($requests[$k], $response); + } } return $return; @@ -57,6 +60,9 @@ public function getExtractorFactory(): ExtractorFactory return $this->extractorFactory; } + /** + * @param array $settings + */ public function setSettings(array $settings): void { $this->extractorFactory->setSettings($settings); @@ -64,7 +70,10 @@ public function setSettings(array $settings): void private function extract(RequestInterface $request, ResponseInterface $response, bool $redirect = true): Extractor { - $uri = $this->crawler->getResponseUri($response) ?: $request->getUri(); + $uri = $this->crawler->getResponseUri($response); + if ($uri === null) { + $uri = $request->getUri(); + } $extractor = $this->extractorFactory->createExtractor($uri, $request, $response, $this->crawler); @@ -72,7 +81,13 @@ private function extract(RequestInterface $request, ResponseInterface $response, return $extractor; } - $request = $this->crawler->createRequest('GET', $extractor->redirect); + // Magic property access returns mixed, but we know it's ?UriInterface from Redirect detector + $redirectUri = $extractor->redirect; + if (!($redirectUri instanceof \Psr\Http\Message\UriInterface)) { + return $extractor; + } + + $request = $this->crawler->createRequest('GET', (string) $redirectUri); $response = $this->crawler->sendRequest($request); return $this->extract($request, $response, false); @@ -80,10 +95,12 @@ private function extract(RequestInterface $request, ResponseInterface $response, private function mustRedirect(Extractor $extractor): bool { - if (!empty($extractor->getOembed()->all())) { + if ($extractor->getOEmbed()->all() !== []) { return false; } - return $extractor->redirect !== null; + // Magic property access returns mixed, but we know it's ?UriInterface from Redirect detector + $redirectUri = $extractor->redirect; + return $redirectUri instanceof \Psr\Http\Message\UriInterface; } } diff --git a/src/EmbedCode.php b/src/EmbedCode.php index 48b87c99..250657e4 100644 --- a/src/EmbedCode.php +++ b/src/EmbedCode.php @@ -19,7 +19,7 @@ public function __construct(string $html, ?int $width = null, ?int $height = nul $this->width = $width; $this->height = $height; - if ($width && $height) { + if ($width !== null && $width !== 0 && $height !== null && $height !== 0) { $this->ratio = round(($height / $width) * 100, 3); } } diff --git a/src/Extractor.php b/src/Extractor.php index 469dd6bc..ac8d6afa 100644 --- a/src/Extractor.php +++ b/src/Extractor.php @@ -66,7 +66,9 @@ class Extractor protected LinkedData $linkedData; protected Metas $metas; + /** @var array */ private array $settings = []; + /** @var array */ private array $customDetectors = []; protected AuthorName $authorName; @@ -124,17 +126,32 @@ public function __construct(UriInterface $uri, RequestInterface $request, Respon $this->url = new Url($this); } + /** + * @return mixed + */ public function __get(string $name) { - $detector = $this->customDetectors[$name] ?? $this->$name ?? null; + $detector = $this->customDetectors[$name] ?? null; + + if ($detector === null && property_exists($this, $name)) { + /** @var mixed $property */ + /** @phpstan-ignore property.dynamicName */ + $property = (fn($n) => $this->$n)($name); + if ($property instanceof Detector) { + $detector = $property; + } + } - if (!$detector || !($detector instanceof Detector)) { + if ($detector === null) { throw new DomainException(sprintf('Invalid key "%s". No detector found for this value', $name)); } return $detector->get(); } + /** + * @return array + */ public function createCustomDetectors(): array { return []; @@ -145,16 +162,25 @@ public function addDetector(string $name, Detector $detector): void $this->customDetectors[$name] = $detector; } + /** + * @param array $settings + */ public function setSettings(array $settings): void { $this->settings = $settings; } + /** + * @return array + */ public function getSettings(): array { return $this->settings; } + /** + * @return mixed + */ public function getSetting(string $key) { return $this->settings[$key] ?? null; @@ -208,10 +234,6 @@ public function resolveUri($uri): UriInterface $uri = $this->crawler->createUri($uri); } - if (!($uri instanceof UriInterface)) { - throw new InvalidArgumentException('Uri must be a string or an instance of UriInterface'); - } - return resolveUri($this->uri, $uri); } diff --git a/src/ExtractorFactory.php b/src/ExtractorFactory.php index 4b5f1e0e..f93c60a0 100644 --- a/src/ExtractorFactory.php +++ b/src/ExtractorFactory.php @@ -11,6 +11,7 @@ class ExtractorFactory { private string $default = Extractor::class; + /** @var array> */ private array $adapters = [ 'slides.com' => Adapters\Slides\Extractor::class, 'pinterest.com' => Adapters\Pinterest\Extractor::class, @@ -32,9 +33,14 @@ class ExtractorFactory 'twitter.com' => Adapters\Twitter\Extractor::class, 'x.com' => Adapters\Twitter\Extractor::class, ]; + /** @var array> */ private array $customDetectors = []; + /** @var array */ private array $settings; + /** + * @param array|null $settings + */ public function __construct(?array $settings = []) { $this->settings = $settings ?? []; @@ -63,8 +69,10 @@ public function createExtractor(UriInterface $uri, RequestInterface $request, Re $extractor = new $class($uri, $request, $response, $crawler); $extractor->setSettings($this->settings); - foreach ($this->customDetectors as $name => $detector) { - $extractor->addDetector($name, new $detector($extractor)); + foreach ($this->customDetectors as $name => $detectorClass) { + /** @var Detectors\Detector */ + $detector = new $detectorClass($extractor); + $extractor->addDetector($name, $detector); } foreach ($extractor->createCustomDetectors() as $name => $detector) { @@ -74,11 +82,17 @@ public function createExtractor(UriInterface $uri, RequestInterface $request, Re return $extractor; } + /** + * @param class-string $class + */ public function addAdapter(string $pattern, string $class): void { $this->adapters[$pattern] = $class; } + /** + * @param class-string $class + */ public function addDetector(string $name, string $class): void { $this->customDetectors[$name] = $class; @@ -94,6 +108,9 @@ public function setDefault(string $class): void $this->default = $class; } + /** + * @param array $settings + */ public function setSettings(array $settings): void { $this->settings = $settings; diff --git a/src/Http/Crawler.php b/src/Http/Crawler.php index 77451eba..2c233636 100644 --- a/src/Http/Crawler.php +++ b/src/Http/Crawler.php @@ -15,6 +15,7 @@ class Crawler implements ClientInterface, RequestFactoryInterface, UriFactoryInt private RequestFactoryInterface $requestFactory; private UriFactoryInterface $uriFactory; private ClientInterface $client; + /** @var array */ private array $defaultHeaders = [ 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:73.0) Gecko/20100101 Firefox/73.0', 'Cache-Control' => 'max-age=0', @@ -22,11 +23,14 @@ class Crawler implements ClientInterface, RequestFactoryInterface, UriFactoryInt public function __construct(?ClientInterface $client = null, ?RequestFactoryInterface $requestFactory = null, ?UriFactoryInterface $uriFactory = null) { - $this->client = $client ?: new CurlClient(); - $this->requestFactory = $requestFactory ?: FactoryDiscovery::getRequestFactory(); - $this->uriFactory = $uriFactory ?: FactoryDiscovery::getUriFactory(); + $this->client = $client !== null ? $client : new CurlClient(); + $this->requestFactory = $requestFactory !== null ? $requestFactory : FactoryDiscovery::getRequestFactory(); + $this->uriFactory = $uriFactory !== null ? $uriFactory : FactoryDiscovery::getUriFactory(); } + /** + * @param array $headers + */ public function addDefaultHeaders(array $headers): void { $this->defaultHeaders = $headers + $this->defaultHeaders; @@ -56,6 +60,9 @@ public function sendRequest(RequestInterface $request): ResponseInterface return $this->client->sendRequest($request); } + /** + * @return array + */ public function sendRequests(RequestInterface ...$requests): array { if ($this->client instanceof CurlClient) { @@ -72,6 +79,6 @@ public function getResponseUri(ResponseInterface $response): ?UriInterface { $location = $response->getHeaderLine('Content-Location'); - return $location ? $this->uriFactory->createUri($location) : null; + return $location !== '' ? $this->uriFactory->createUri($location) : null; } } diff --git a/src/Http/CurlClient.php b/src/Http/CurlClient.php index 6b3f44c5..9794d718 100644 --- a/src/Http/CurlClient.php +++ b/src/Http/CurlClient.php @@ -14,13 +14,17 @@ final class CurlClient implements ClientInterface { private ResponseFactoryInterface $responseFactory; + /** @var array */ private array $settings = []; public function __construct(?ResponseFactoryInterface $responseFactory = null) { - $this->responseFactory = $responseFactory ?: FactoryDiscovery::getResponseFactory(); + $this->responseFactory = $responseFactory !== null ? $responseFactory : FactoryDiscovery::getResponseFactory(); } + /** + * @param array $settings + */ public function setSettings(array $settings): void { $this->settings = $settings + $this->settings; @@ -33,6 +37,9 @@ public function sendRequest(RequestInterface $request): ResponseInterface return $responses[0]; } + /** + * @return ResponseInterface[] + */ public function sendRequests(RequestInterface ...$request): array { return CurlDispatcher::fetch($this->settings, $this->responseFactory, ...$request); diff --git a/src/Http/CurlDispatcher.php b/src/Http/CurlDispatcher.php index e3312788..6cdde3a1 100644 --- a/src/Http/CurlDispatcher.php +++ b/src/Http/CurlDispatcher.php @@ -12,6 +12,8 @@ /** * Class to fetch html pages + * + * @phpstan-type CurlResource resource|\CurlHandle */ final class CurlDispatcher { @@ -19,22 +21,31 @@ final class CurlDispatcher private RequestInterface $request; private StreamFactoryInterface $streamFactory; + /** + * @var resource|\CurlHandle + * @phpstan-ignore property.unusedType (resource type needed for PHP 7.4 compatibility) + */ private $curl; - private $result; + /** @var array */ private array $headers = []; - private $isBinary = false; + private bool $isBinary = false; private ?StreamInterface $body = null; private ?int $error = null; + /** @var array */ private array $settings; /** + * @param array $settings * @return ResponseInterface[] */ public static function fetch(array $settings, ResponseFactoryInterface $responseFactory, RequestInterface ...$requests): array { if (count($requests) === 1) { $connection = new static($settings, $requests[0]); - curl_exec($connection->curl); + /** @var resource|\CurlHandle $curlHandle */ + $curlHandle = $connection->curl; + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + curl_exec($curlHandle); return [$connection->getResponse($responseFactory)]; } @@ -44,7 +55,10 @@ public static function fetch(array $settings, ResponseFactoryInterface $response foreach ($requests as $request) { $connection = new static($settings, $request); - curl_multi_add_handle($multi, $connection->curl); + /** @var resource|\CurlHandle $curlHandle */ + $curlHandle = $connection->curl; + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + curl_multi_add_handle($multi, $curlHandle); $connections[] = $connection; } @@ -60,19 +74,29 @@ public static function fetch(array $settings, ResponseFactoryInterface $response $info = curl_multi_info_read($multi); - if ($info) { - foreach ($connections as $connection) { - if ($connection->curl === $info['handle']) { - $connection->result = $info['result']; - break; + if (is_array($info) && isset($info['handle'], $info['result'])) { + $result = $info['result']; + // Validate and cast result to int, only set if it's a non-success error code + if (is_numeric($result)) { + $errorCode = (int) $result; + if ($errorCode !== CURLE_OK) { + foreach ($connections as $connection) { + if ($connection->curl === $info['handle']) { + $connection->error = $errorCode; + break; + } + } } } } - } while ($active && $status == CURLM_OK); + } while ($active && $status === CURLM_OK); //Close connections foreach ($connections as $connection) { - curl_multi_remove_handle($multi, $connection->curl); + /** @var resource|\CurlHandle $curlHandle */ + $curlHandle = $connection->curl; + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + curl_multi_remove_handle($multi, $curlHandle); } curl_multi_close($multi); @@ -83,6 +107,9 @@ public static function fetch(array $settings, ResponseFactoryInterface $response ); } + /** + * @param array $settings + */ private function __construct(array $settings, RequestInterface $request, ?StreamFactoryInterface $streamFactory = null) { $this->request = $request; @@ -116,17 +143,25 @@ private function __construct(array $settings, RequestInterface $request, ?Stream private function getResponse(ResponseFactoryInterface $responseFactory): ResponseInterface { - $info = curl_getinfo($this->curl); + /** @var resource|\CurlHandle $curlHandle */ + $curlHandle = $this->curl; + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + $info = curl_getinfo($curlHandle); - if ($this->error) { + if ($this->error !== null && $this->error !== 0) { + /** @phpstan-ignore argument.type (curl_strerror returns string|null in some versions) */ $this->error(curl_strerror($this->error), $this->error); } - if (curl_errno($this->curl)) { - $this->error(curl_error($this->curl), curl_errno($this->curl)); + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + $errno = curl_errno($curlHandle); + if ($errno !== 0) { + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + $this->error(curl_error($curlHandle), $errno); } - curl_close($this->curl); + /** @phpstan-ignore argument.type (PHP 7.4/8.0 compatibility) */ + curl_close($curlHandle); $response = $responseFactory->createResponse($info['http_code']); @@ -139,7 +174,7 @@ private function getResponse(ResponseFactoryInterface $responseFactory): Respons ->withAddedHeader('Content-Location', $info['url']) ->withAddedHeader('X-Request-Time', sprintf('%.3f ms', $info['total_time'])); - if ($this->body) { + if ($this->body !== null) { //5Mb max $this->body->rewind(); $response = $response->withBody($this->body); @@ -149,11 +184,11 @@ private function getResponse(ResponseFactoryInterface $responseFactory): Respons return $response; } - private function error(string $message, int $code) + private function error(string $message, int $code): void { $ignored = $this->settings['ignored_errors'] ?? null; - if ($ignored === true || (is_array($ignored) && in_array($code, $ignored))) { + if ($ignored === true || (is_array($ignored) && in_array($code, $ignored, true))) { return; } @@ -165,6 +200,9 @@ private function error(string $message, int $code) throw new NetworkException($message, $code, $this->request); } + /** + * @return array + */ private function getRequestHeaders(): array { $headers = []; @@ -181,17 +219,25 @@ private function getRequestHeaders(): array return $headers; } + /** + * @param resource|\CurlHandle $curl + * @param mixed $string + */ private function writeHeader($curl, $string): int { - if (preg_match('/^([\w-]+):(.*)$/', $string, $matches)) { + if (!is_string($string)) { + return 0; + } + + if (preg_match('/^([\w-]+):(.*)$/', $string, $matches) === 1) { $name = strtolower($matches[1]); $value = trim($matches[2]); $this->headers[] = [$name, $value]; if ($name === 'content-type') { - $this->isBinary = !preg_match('/(text|html|json)/', strtolower($value)); + $this->isBinary = preg_match('/(text|html|json)/', strtolower($value)) === 0; } - } elseif ($this->headers) { + } elseif ($this->headers !== []) { $key = array_key_last($this->headers); $this->headers[$key][1] .= ' '.trim($string); } @@ -199,13 +245,21 @@ private function writeHeader($curl, $string): int return strlen($string); } + /** + * @param resource|\CurlHandle $curl + * @param mixed $string + */ private function writeBody($curl, $string): int { + if (!is_string($string)) { + return -1; + } + if ($this->isBinary) { return -1; } - if (!$this->body) { + if ($this->body === null) { $this->body = $this->streamFactory->createStreamFromFile('php://temp', 'w+'); } diff --git a/src/Http/FactoryDiscovery.php b/src/Http/FactoryDiscovery.php index 8072d6fb..993e6435 100644 --- a/src/Http/FactoryDiscovery.php +++ b/src/Http/FactoryDiscovery.php @@ -45,7 +45,9 @@ abstract class FactoryDiscovery public static function getRequestFactory(): RequestFactoryInterface { - if ($class = self::searchClass(self::REQUEST)) { + $class = self::searchClass(self::REQUEST); + if ($class !== null) { + /** @var RequestFactoryInterface */ return new $class(); } @@ -54,7 +56,9 @@ public static function getRequestFactory(): RequestFactoryInterface public static function getResponseFactory(): ResponseFactoryInterface { - if ($class = self::searchClass(self::RESPONSE)) { + $class = self::searchClass(self::RESPONSE); + if ($class !== null) { + /** @var ResponseFactoryInterface */ return new $class(); } @@ -63,7 +67,9 @@ public static function getResponseFactory(): ResponseFactoryInterface public static function getUriFactory(): UriFactoryInterface { - if ($class = self::searchClass(self::URI)) { + $class = self::searchClass(self::URI); + if ($class !== null) { + /** @var UriFactoryInterface */ return new $class(); } @@ -72,14 +78,19 @@ public static function getUriFactory(): UriFactoryInterface public static function getStreamFactory(): StreamFactoryInterface { - if ($class = self::searchClass(self::STREAM)) { + $class = self::searchClass(self::STREAM); + if ($class !== null) { + /** @var StreamFactoryInterface */ return new $class(); } throw new RuntimeException('No StreamFactoryInterface detected'); } - private static function searchClass($classes): ?string + /** + * @param string[] $classes + */ + private static function searchClass(array $classes): ?string { foreach ($classes as $class) { if (class_exists($class)) { diff --git a/src/Http/RequestException.php b/src/Http/RequestException.php index bfa9b4e6..701d3400 100644 --- a/src/Http/RequestException.php +++ b/src/Http/RequestException.php @@ -13,6 +13,7 @@ final class RequestException extends Exception implements RequestExceptionInterf public function __construct(string $message, int $code, RequestInterface $request) { + parent::__construct($message, $code); $this->request = $request; } diff --git a/src/HttpApiTrait.php b/src/HttpApiTrait.php index fa697431..8e4180bb 100644 --- a/src/HttpApiTrait.php +++ b/src/HttpApiTrait.php @@ -10,13 +10,16 @@ trait HttpApiTrait { use ApiTrait; - private ?UriInterface $endpoint; + private ?UriInterface $endpoint = null; public function getEndpoint(): ?UriInterface { return $this->endpoint; } + /** + * @return array + */ private function fetchJSON(UriInterface $uri): array { $crawler = $this->extractor->getCrawler(); @@ -24,7 +27,12 @@ private function fetchJSON(UriInterface $uri): array $response = $crawler->sendRequest($request); try { - return json_decode((string) $response->getBody(), true) ?: []; + $data = json_decode((string) $response->getBody(), true); + if (is_array($data)) { + /** @var array */ + return $data; + } + return []; } catch (Exception $exception) { return []; } diff --git a/src/LinkedData.php b/src/LinkedData.php index 9d7cfe19..91876654 100644 --- a/src/LinkedData.php +++ b/src/LinkedData.php @@ -15,15 +15,19 @@ class LinkedData { use ApiTrait; - private ?DocumentInterface $document; + private ?DocumentInterface $document = null; - private array $allData; + /** @var array */ + private array $allData = []; + /** + * @return mixed + */ public function get(string ...$keys) { $graph = $this->getGraph(); - if (!$graph) { + if ($graph === null) { return null; } @@ -33,7 +37,7 @@ public function get(string ...$keys) foreach ($graph->getNodes() as $node) { $value = self::getValue($node, ...$subkeys); - if ($value) { + if ($value !== null && $value !== '' && $value !== false && $value !== []) { return $value; } } @@ -42,9 +46,12 @@ public function get(string ...$keys) return null; } - public function getAll() + /** + * @return array + */ + public function getAll(): array { - if (!isset($this->allData)) { + if ($this->allData === []) { $this->fetchData(); } @@ -55,7 +62,11 @@ private function getGraph(?string $name = null): ?GraphInterface { if (!isset($this->document)) { try { - $this->document = LdDocument::load(json_encode($this->all())); + $encoded = json_encode($this->all()); + if ($encoded === false) { + $encoded = '{}'; + } + $this->document = LdDocument::load($encoded); } catch (Throwable $throwable) { $this->document = LdDocument::load('{}'); return null; @@ -65,6 +76,9 @@ private function getGraph(?string $name = null): ?GraphInterface return $this->document->getGraph($name); } + /** + * @return array + */ protected function fetchData(): array { $this->allData = []; @@ -72,16 +86,17 @@ protected function fetchData(): array $document = $this->extractor->getDocument(); $nodes = $document->select('.//script', ['type' => 'application/ld+json'])->strAll(); - if (empty($nodes)) { + if ($nodes === []) { return []; } try { + /** @var array $data */ $data = []; $request_uri = (string)$this->extractor->getUri(); foreach ($nodes as $node) { $ldjson = json_decode($node, true); - if (!empty($ldjson)) { + if (is_array($ldjson) && $ldjson !== []) { // some pages with multiple ld+json blocks will put // each block into an array (Flickr does this). Most @@ -92,24 +107,30 @@ protected function fetchData(): array $ldjson = [$ldjson]; } - foreach ($ldjson as $node) { - if (empty($data)) { - $data = $node; - } elseif (isset($node['mainEntityOfPage'])) { + foreach ($ldjson as $ldNode) { + if (!is_array($ldNode)) { + continue; + } + if ($data === []) { + /** @var array $data */ + $data = $ldNode; + } elseif (isset($ldNode['mainEntityOfPage'])) { $url = ''; - if (is_string($node['mainEntityOfPage'])) { - $url = $node['mainEntityOfPage']; - } elseif (isset($node['mainEntityOfPage']['@id'])) { - $url = $node['mainEntityOfPage']['@id']; + if (is_string($ldNode['mainEntityOfPage'])) { + $url = $ldNode['mainEntityOfPage']; + } elseif (is_array($ldNode['mainEntityOfPage']) && isset($ldNode['mainEntityOfPage']['@id']) && is_string($ldNode['mainEntityOfPage']['@id'])) { + $url = $ldNode['mainEntityOfPage']['@id']; } - if (!empty($url) && $url == $request_uri) { - $data = $node; + if ($url !== '' && $url === $request_uri) { + /** @var array $data */ + $data = $ldNode; } } } - - $this->allData = array_merge($this->allData, $ldjson); + /** @var array $mergedData */ + $mergedData = array_merge($this->allData, $ldjson); + $this->allData = $mergedData; } } @@ -119,6 +140,9 @@ protected function fetchData(): array } } + /** + * @return mixed + */ private static function getValue(Node $node, string ...$keys) { foreach ($keys as $key) { @@ -131,7 +155,7 @@ private static function getValue(Node $node, string ...$keys) $node = $node->getProperty("http://schema.org/{$key}"); - if (!$node) { + if ($node === null) { return null; } } @@ -139,6 +163,10 @@ private static function getValue(Node $node, string ...$keys) return self::detectValue($node); } + /** + * @param mixed $value + * @return mixed + */ private static function detectValue($value) { if (is_array($value)) { @@ -156,6 +184,10 @@ private static function detectValue($value) return $value->getId(); } - return $value->getValue(); + if (is_object($value) && method_exists($value, 'getValue')) { + return $value->getValue(); + } + + return null; } } diff --git a/src/Metas.php b/src/Metas.php index 70abe6cc..0450370f 100644 --- a/src/Metas.php +++ b/src/Metas.php @@ -7,16 +7,28 @@ class Metas { use ApiTrait; + /** + * @return array + */ protected function fetchData(): array { $data = []; $document = $this->extractor->getDocument(); foreach ($document->select('.//meta')->nodes() as $node) { - $type = $node->getAttribute('name') ?: $node->getAttribute('property') ?: $node->getAttribute('itemprop'); + if (!($node instanceof \DOMElement)) { + continue; + } + $type = $node->getAttribute('name'); + if ($type === '') { + $type = $node->getAttribute('property'); + } + if ($type === '') { + $type = $node->getAttribute('itemprop'); + } $value = $node->getAttribute('content'); - if (!empty($value) && !empty($type)) { + if ($value !== '' && $type !== '') { $type = strtolower($type); $data[$type] ??= []; $data[$type][] = $value; @@ -26,6 +38,9 @@ protected function fetchData(): array return $data; } + /** + * @return mixed + */ public function get(string ...$keys) { $data = $this->all(); @@ -33,7 +48,7 @@ public function get(string ...$keys) foreach ($keys as $key) { $values = $data[$key] ?? null; - if ($values) { + if ($values !== null && $values !== '' && $values !== []) { return $values; } } diff --git a/src/OEmbed.php b/src/OEmbed.php index e089150e..b530591f 100644 --- a/src/OEmbed.php +++ b/src/OEmbed.php @@ -11,30 +11,48 @@ class OEmbed { use HttpApiTrait; - private static $providers; + /** @var array|null */ + private static $providers = null; + + /** @var array */ private array $defaults = []; + /** + * @return array + */ private static function getProviders(): array { - if (!is_array(self::$providers)) { - self::$providers = require __DIR__.'/resources/oembed.php'; + if (self::$providers === null) { + /** @var array $loaded */ + $loaded = require __DIR__.'/resources/oembed.php'; + self::$providers = $loaded; } return self::$providers; } + /** + * @return array + */ public function getOembedQueryParameters(string $url): array { $queryParameters = ['url' => $url, 'format' => 'json']; + $setting = $this->extractor->getSetting('oembed:query_parameters'); + $additional = is_array($setting) ? $setting : []; - return array_merge($queryParameters, $this->extractor->getSetting('oembed:query_parameters') ?? []); + /** @var array $result */ + $result = array_merge($queryParameters, $additional); + return $result; } + /** + * @return array + */ protected function fetchData(): array { $this->endpoint = $this->detectEndpoint(); - if (empty($this->endpoint)) { + if ($this->endpoint === null) { return []; } @@ -53,11 +71,20 @@ protected function detectEndpoint(): ?UriInterface { $document = $this->extractor->getDocument(); - $endpoint = $document->link('alternate', ['type' => 'application/json+oembed']) - ?: $document->link('alternate', ['type' => 'text/json+oembed']) - ?: $document->link('alternate', ['type' => 'application/xml+oembed']) - ?: $document->link('alternate', ['type' => 'text/xml+oembed']) - ?: null; + $endpoint = null; + $types = [ + 'application/json+oembed', + 'text/json+oembed', + 'application/xml+oembed', + 'text/xml+oembed', + ]; + + foreach ($types as $type) { + $endpoint = $document->link('alternate', ['type' => $type]); + if ($endpoint !== null) { + break; + } + } if ($endpoint === null) { return $this->detectEndpointFromProviders(); @@ -65,7 +92,9 @@ protected function detectEndpoint(): ?UriInterface // Add configured OEmbed query parameters parse_str($endpoint->getQuery(), $query); - $query = array_merge($query, $this->extractor->getSetting('oembed:query_parameters') ?? []); + $setting = $this->extractor->getSetting('oembed:query_parameters'); + $additional = is_array($setting) ? $setting : []; + $query = array_merge($query, $additional); $endpoint = $endpoint->withQuery(http_build_query($query)); return $endpoint; @@ -75,15 +104,19 @@ private function detectEndpointFromProviders(): ?UriInterface { $url = (string) $this->extractor->getUri(); - if ($endpoint = $this->detectEndpointFromUrl($url)) { + $endpoint = $this->detectEndpointFromUrl($url); + if ($endpoint !== null) { return $endpoint; } $initialUrl = (string) $this->extractor->getRequest()->getUri(); - if ($initialUrl !== $url && ($endpoint = $this->detectEndpointFromUrl($initialUrl))) { - $this->defaults['url'] = $initialUrl; - return $endpoint; + if ($initialUrl !== $url) { + $endpoint = $this->detectEndpointFromUrl($initialUrl); + if ($endpoint !== null) { + $this->defaults['url'] = $initialUrl; + return $endpoint; + } } return null; @@ -93,7 +126,7 @@ private function detectEndpointFromUrl(string $url): ?UriInterface { $endpoint = self::searchEndpoint(self::getProviders(), $url); - if (!$endpoint) { + if ($endpoint === null || $endpoint === '') { return null; } @@ -102,12 +135,22 @@ private function detectEndpointFromUrl(string $url): ?UriInterface ->withQuery(http_build_query($this->getOembedQueryParameters($url))); } + /** + * @param array $providers + */ private static function searchEndpoint(array $providers, string $url): ?string { foreach ($providers as $endpoint => $patterns) { + if (!is_array($patterns)) { + continue; + } foreach ($patterns as $pattern) { - if (preg_match($pattern, $url)) { - return $endpoint; + if (!is_string($pattern)) { + continue; + } + $matchResult = preg_match($pattern, $url); + if ($matchResult === 1) { + return is_string($endpoint) ? $endpoint : null; } } } @@ -126,21 +169,27 @@ private static function isXML(UriInterface $uri): bool parse_str($uri->getQuery(), $params); $format = $params['format'] ?? null; - if ($format && strtolower($format) === 'xml') { + if (is_string($format) && $format !== '' && strtolower($format) === 'xml') { return true; } return false; } + /** + * @return array + */ private function extractXML(string $xml): array { try { // Remove the DOCTYPE declaration for to prevent XML Quadratic Blowup vulnerability - $xml = preg_replace('/^]*+>/i', '', $xml, 1); + $cleanedXml = preg_replace('/^]*+>/i', '', $xml, 1); + if (!is_string($cleanedXml)) { + return []; + } $data = []; $errors = libxml_use_internal_errors(true); - $content = new SimpleXMLElement($xml); + $content = new SimpleXMLElement($cleanedXml); libxml_use_internal_errors($errors); foreach ($content as $element) { @@ -154,18 +203,28 @@ private function extractXML(string $xml): array $data[$name] = $value; } - return $data ? ($data + $this->defaults) : []; + return $data !== [] ? ($data + $this->defaults) : []; } catch (Exception $exception) { return []; } } + /** + * @return array + */ private function extractJSON(string $json): array { try { - $data = json_decode($json, true); + /** @var mixed $decoded */ + $decoded = json_decode($json, true); + + if (!is_array($decoded)) { + return []; + } - return is_array($data) ? ($data + $this->defaults) : []; + /** @var array $result */ + $result = $decoded + $this->defaults; + return $result; } catch (Exception $exception) { return []; } diff --git a/src/QueryResult.php b/src/QueryResult.php index 57e85955..1c60eecf 100644 --- a/src/QueryResult.php +++ b/src/QueryResult.php @@ -5,6 +5,7 @@ use Closure; use DOMElement; +use DOMNode; use DOMNodeList; use Psr\Http\Message\UriInterface; use Throwable; @@ -12,78 +13,121 @@ class QueryResult { private Extractor $extractor; + /** @var list */ private array $nodes = []; + /** + * @param DOMNodeList $result + */ public function __construct(DOMNodeList $result, Extractor $extractor) { - $this->nodes = iterator_to_array($result, false); + /** @var list $nodeArray */ + $nodeArray = iterator_to_array($result, false); + $this->nodes = $nodeArray; $this->extractor = $extractor; } public function node(): ?DOMElement { - return $this->nodes[0] ?? null; + $firstNode = $this->nodes[0] ?? null; + return $firstNode instanceof DOMElement ? $firstNode : null; } + /** + * @return list + */ public function nodes(): array { return $this->nodes; } + /** + * @param Closure(DOMNode): bool $callback + */ public function filter(Closure $callback): self { - $this->nodes = array_filter($this->nodes, $callback); + $this->nodes = array_values(array_filter($this->nodes, $callback)); return $this; } + /** + * @return mixed + */ public function get(?string $attribute = null) { $node = $this->node(); - if (!$node) { + if ($node === null) { return null; } - return $attribute ? self::getAttribute($node, $attribute) : $node->nodeValue; + return $attribute !== null ? self::getAttribute($node, $attribute) : $node->nodeValue; } + /** + * @return list + */ public function getAll(?string $attribute = null): array { $nodes = $this->nodes(); - return array_filter( + return array_values(array_filter( array_map( - fn ($node) => $attribute ? self::getAttribute($node, $attribute) : $node->nodeValue, + function(\DOMNode $node) use ($attribute) { + if (!$node instanceof DOMElement) { + return $attribute !== null ? null : $node->nodeValue; + } + return $attribute !== null ? self::getAttribute($node, $attribute) : $node->nodeValue; + }, $nodes - ) - ); + ), + fn($val) => $val !== null && $val !== '' + )); } public function str(?string $attribute = null): ?string { $value = $this->get($attribute); - return $value ? clean($value) : null; + if (!is_string($value) && !is_numeric($value)) { + return null; + } + + $cleaned = clean((string)$value); + return $cleaned !== '' ? $cleaned : null; } + /** + * @return list + */ public function strAll(?string $attribute = null): array { - return array_filter(array_map(fn ($value) => clean($value), $this->getAll($attribute))); + return array_values(array_filter(array_map(function($value) { + if (!is_string($value) && !is_numeric($value)) { + return null; + } + $cleaned = clean((string)$value); + return $cleaned !== '' ? $cleaned : null; + }, $this->getAll($attribute)), fn($v) => $v !== null)); } public function int(?string $attribute = null): ?int { $value = $this->get($attribute); - return $value ? (int) $value : null; + if ($value === null || $value === '' || $value === false) { + return null; + } + + return is_numeric($value) ? (int) $value : null; } public function url(?string $attribute = null): ?UriInterface { $value = $this->get($attribute); - if (!$value) { + if (!is_string($value) || $value === '') { return null; } @@ -102,7 +146,7 @@ private static function getAttribute(DOMElement $node, string $name): ?string for ($i = 0; $i < $attributes->length; ++$i) { $attribute = $attributes->item($i); - if ($attribute->name === $name) { + if ($attribute !== null && $attribute->name === $name) { return $attribute->nodeValue; } } diff --git a/src/functions.php b/src/functions.php index c3313a6a..9965ee63 100644 --- a/src/functions.php +++ b/src/functions.php @@ -14,10 +14,14 @@ function clean(string $value, bool $allowHTML = false): ?string $value = strip_tags($value); } - $value = trim(preg_replace('/\s+/u', ' ', $value)); + $replaced = preg_replace('/\s+/u', ' ', $value); + $value = trim($replaced !== null ? $replaced : $value); return $value === '' ? null : $value; } +/** + * @param array $attributes + */ function html(string $tagName, array $attributes, ?string $content = null): string { $html = "<{$tagName}"; @@ -28,7 +32,16 @@ function html(string $tagName, array $attributes, ?string $content = null): stri } elseif ($value === true) { $html .= " $name"; } elseif ($value !== false) { - $html .= ' '.$name.'="'.htmlspecialchars((string) $value).'"'; + if (is_string($value)) { + $stringValue = $value; + } elseif (is_scalar($value)) { + $stringValue = (string) $value; + } elseif (is_object($value) && method_exists($value, '__toString')) { + $stringValue = (string) $value; + } else { + $stringValue = ''; + } + $html .= ' '.$name.'="'.htmlspecialchars($stringValue).'"'; } } @@ -47,11 +60,11 @@ function resolveUri(UriInterface $base, UriInterface $uri): UriInterface { $uri = $uri->withPath(resolvePath($base->getPath(), $uri->getPath())); - if (!$uri->getHost()) { + if ($uri->getHost() === '') { $uri = $uri->withHost($base->getHost()); } - if (!$uri->getScheme()) { + if ($uri->getScheme() === '') { $uri = $uri->withScheme($base->getScheme()); } @@ -62,8 +75,9 @@ function resolveUri(UriInterface $base, UriInterface $uri): UriInterface function isHttp(string $uri): bool { - if (preg_match('/^(\w+):/', $uri, $matches)) { - return in_array(strtolower($matches[1]), ['http', 'https']); + $result = preg_match('/^(\w+):/', $uri, $matches); + if ($result !== false && $result > 0) { + return in_array(strtolower($matches[1]), ['http', 'https'], true); } return true; @@ -81,20 +95,22 @@ function resolvePath(string $base, string $path): string if (substr($base, -1) !== '/') { $position = strrpos($base, '/'); - $base = substr($base, 0, $position); + $base = $position !== false ? substr($base, 0, $position) : ''; } $path = "{$base}/{$path}"; - $parts = array_filter(explode('/', $path), 'strlen'); + $parts = array_filter(explode('/', $path), static function (string $value): bool { + return strlen($value) > 0; + }); $absolutes = []; foreach ($parts as $part) { - if ('.' == $part) { + if ('.' === $part) { continue; } - if ('..' == $part) { + if ('..' === $part) { array_pop($absolutes); continue; } @@ -105,16 +121,23 @@ function resolvePath(string $base, string $path): string return implode('/', $absolutes); } -function cleanPath(string $path): string +function cleanPath(?string $path): string { - if ($path === '') { + if ($path === null || $path === '') { return '/'; } - $path = preg_replace('|[/]{2,}|', '/', $path); + $cleanedPath = preg_replace('|[/]{2,}|', '/', $path); + if ($cleanedPath === null) { + return '/'; + } + $path = $cleanedPath; if (strpos($path, ';jsessionid=') !== false) { - $path = preg_replace('/^(.*)(;jsessionid=.*)$/i', '$1', $path); + $cleanedPath = preg_replace('/^(.*)(;jsessionid=.*)$/i', '$1', $path); + if ($cleanedPath !== null) { + $path = $cleanedPath; + } } return $path; @@ -147,7 +170,7 @@ function isEmpty(...$values): bool ); foreach ($values as $value) { - if (empty($value) || in_array($value, $skipValues)) { + if ($value === null || $value === '' || $value === [] || $value === false || $value === 0 || $value === 0.0 || $value === '0' || in_array($value, $skipValues, true)) { return true; } } @@ -160,7 +183,7 @@ function isEmpty(...$values): bool * Polyfil for https://www.php.net/manual/en/function.array-is-list.php * which is only available in PHP 8.1+ * - * @param array $array The array + * @param array $array The array * * @return bool */ diff --git a/tests/AuthorUrlEmptyStringTest.php b/tests/AuthorUrlEmptyStringTest.php new file mode 100644 index 00000000..011552c8 --- /dev/null +++ b/tests/AuthorUrlEmptyStringTest.php @@ -0,0 +1,43 @@ +assertNotFalse($content, "File $file should exist"); + + // Verify the pattern includes type, empty string, and '0' check + $hasTypeCheck = str_contains($content, 'is_string('); + $hasEmptyCheck = str_contains($content, "!== ''"); + $hasZeroCheck = str_contains($content, "!== '0'"); + + $this->assertTrue( + $hasTypeCheck && $hasEmptyCheck && $hasZeroCheck, + "File $file should check type (is_string), empty string, and '0'" + ); + } + } +} diff --git a/tests/EmbedCodeTest.php b/tests/EmbedCodeTest.php new file mode 100644 index 00000000..60ae3c62 --- /dev/null +++ b/tests/EmbedCodeTest.php @@ -0,0 +1,89 @@ +', 380, 120); + $this->assertEqualsWithDelta(31.579, $code->ratio, 0.001); + } + + public function testRatioCalculationWithNullWidth() + { + // width=null case + $code = new EmbedCode('', null, 400); + $this->assertNull($code->ratio); + } + + public function testRatioCalculationWithZeroWidth() + { + // width=0 case (prevents division-by-zero) + $code = new EmbedCode('', 0, 400); + $this->assertNull($code->ratio); + } + + public function testRatioCalculationWithNullHeight() + { + // height=null case + $code = new EmbedCode('', 400, null); + $this->assertNull($code->ratio); + } + + public function testRatioCalculationWithZeroHeight() + { + // height=0 case (prevents meaningless ratio calculation) + $code = new EmbedCode('', 400, 0); + $this->assertNull($code->ratio); + } + + public function testRatioCalculationWithBothZero() + { + // width=0, height=0 case (prevents division-by-zero) + $code = new EmbedCode('', 0, 0); + $this->assertNull($code->ratio); + } + + public function testRatioCalculationWithBothNull() + { + // width=null, height=null case + $code = new EmbedCode('', null, null); + $this->assertNull($code->ratio); + } + + public function testJsonSerialize() + { + $code = new EmbedCode('
test
', 640, 480); + $json = $code->jsonSerialize(); + + $this->assertEquals('
test
', $json['html']); + $this->assertEquals(640, $json['width']); + $this->assertEquals(480, $json['height']); + $this->assertEqualsWithDelta(75.0, $json['ratio'], 0.001); + } + + public function testToString() + { + $html = ''; + $code = new EmbedCode($html, 640, 480); + + $this->assertEquals($html, (string) $code); + } + + public function testHtmlOnlyConstruction() + { + // Construction with HTML only (width/height are null) + $code = new EmbedCode('

content

'); + + $this->assertEquals('

content

', $code->html); + $this->assertNull($code->width); + $this->assertNull($code->height); + $this->assertNull($code->ratio); + } +}