From 38b2f2b8e13a41aedd8de941c0a98f94d447da78 Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Sat, 24 Mar 2018 14:22:38 -0700 Subject: [PATCH 1/5] Add failing test and fix for #157 (hentry) Update existing tests to verify rel-tag upgrade --- Mf2/Parser.php | 28 ++++++++++++++ tests/Mf2/ClassicMicroformatsTest.php | 53 ++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 4d72f94..406283b 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1319,6 +1319,32 @@ public function parseRelsAndAlternates() { return array($rels, $rel_urls, $alternates); } + /** + * Find rel=tag elements that don't have class=category and have an href. + * For each element, get the last non-empty URL segment. Append a + * element with that value as the category. Uses the mf1 class 'category' + * which will then be upgraded to p-category during backcompat. + * @param DOMElement $el + */ + public function upgradeRelTagToCategory(DOMElement $el) { + $rel_tag = $this->xpath->query('.//a[contains(concat(" ",normalize-space(@rel)," ")," tag ") and not(contains(concat(" ", normalize-space(@class), " "), " category ")) and @href]', $el); + + if ( $rel_tag->length ) { + foreach ( $rel_tag as $tempEl ) { + $path = trim(parse_url($tempEl->getAttribute('href'), PHP_URL_PATH), ' /'); + $segments = explode('/', $path); + $value = array_pop($segments); + + # build the element + $dataEl = $tempEl->ownerDocument->createElement('data'); + $dataEl->setAttribute('class', 'category'); + $dataEl->setAttribute('value', $value); + + # append before the current element + $tempEl->parentNode->insertBefore($dataEl, $tempEl); + } + } + } /** * Kicks off the parsing routine @@ -1502,6 +1528,8 @@ public function backcompat(DOMElement $el, $context = '', $isParentMf2 = false) switch ( $classname ) { case 'hentry': + $this->upgradeRelTagToCategory($el); + $rel_bookmark = $this->xpath->query('.//a[contains(concat(" ",normalize-space(@rel)," ")," bookmark ") and @href]', $el); if ( $rel_bookmark->length ) { diff --git a/tests/Mf2/ClassicMicroformatsTest.php b/tests/Mf2/ClassicMicroformatsTest.php index d00c580..f64c123 100644 --- a/tests/Mf2/ClassicMicroformatsTest.php +++ b/tests/Mf2/ClassicMicroformatsTest.php @@ -102,6 +102,15 @@ public function testParsesFBerrimanClassicHEntry() { $result = $parser->parse(); $e = $result['items'][0]; $this->assertContains('h-entry', $e['type']); + $this->assertArrayHasKey('category', $e['properties']); + $this->assertCount(7, $e['properties']['category']); + $this->assertContains('speaking', $e['properties']['category']); + $this->assertContains('web-dev', $e['properties']['category']); + $this->assertContains('conferences', $e['properties']['category']); + $this->assertContains('front-trends', $e['properties']['category']); + $this->assertContains('fronttrends', $e['properties']['category']); + $this->assertContains('speaking', $e['properties']['category']); + $this->assertContains('txjs', $e['properties']['category']); } public function testParsesSnarfedOrgArticleCorrectly() { @@ -566,8 +575,8 @@ public function testMixedMf2andMf1Case1() { $this->assertCount(1, $result['items'][0]['properties']['author']); $this->assertCount(1, $result['items'][0]['properties']['author'][0]['type']); - $this->assertEquals('h-card', $result['items'][0]['properties']['author'][0]['type'][0]); + $this->assertArrayNotHasKey('category', $result['items'][0]['properties']); } @@ -837,5 +846,47 @@ public function testParsesHfeed() { $this->assertArrayHasKey('value', $output['items'][0]['children'][0]['properties']['author'][0]); } + /** + * @see https://github.com/indieweb/php-mf2/issues/157 + * @see source: http://www.manton.org/2018/03/indieweb-generation-4-and-hosted-domains.html + */ + public function testHEntryRelTag() { + $input = '
+
+ +

IndieWeb generation 4 and hosted domains

+
+ +
+

Naturally because of the goals of Micro.blog, I see a lot of discussion about “owning your content”. It’s an important part of the mission for Micro.blog to take control back from closed, ad-supported social networks and instead embrace posting on our own blogs again.

+

But what does it mean to own our content? Do we have to install WordPress or some home-grown blogging system for it to be considered true content ownership, where we have the source code and direct SFTP access to the server? No. If that’s our definition, then content ownership will be permanently reserved for programmers and technical folks who have hours to spend on server configuration.

+

IndieWebCamp has a generations chart to illustrate the path from early adopters to mainstream users. Eli Mellen highlighted it in a recent post about the need to bridge the gap between the technical aspects of IndieWeb tools and more approachable platforms. With Micro.blog specifically, the goal is “generation 4”, and I think we’re on track to get there.

+

I want blogging to be as easy as tweeting. Anything short of that isn’t good enough for Micro.blog. You’ll notice when you use Twitter that they never ask you to SFTP into twitter.com to configure your account. They don’t ask you to install anything.

+

More powerful software that you can endlessly customize will always have its place. It’s good to have a range of options, including open source to tinker with. That’s often where some of the best ideas start. But too often I see people get lost in the weeds of plugins and themes, lured in by the myth that you have to self-host with WordPress to be part of the IndieWeb.

+

Owning your content isn’t about portable software. It’s about portable URLs and data. It’s about domain names.

+

When you write and post photos at your own domain name, your content can outlive any one blogging platform. This month marked the 16th anniversary of blogging at manton.org, and in that time I’ve switched blogging platforms and hosting providers a few times. The posts and URLs can all be preserved through those changes because it’s my own domain name.

+

I was disappointed when Medium announced they were discontinuing support for custom domain names. I’m linking to the Internet Archive copy because Medium’s help page about this is no longer available. If “no custom domains” is still their policy, it’s a setback for the open web, and dooms Medium to the same dead-end as twitter.com/username URLs.

+

If you can’t use your own domain name, you can’t own it. Your content will be forever stuck at those silo URLs, beholden to the whims of the algorithmic timeline and shifting priorities of the executive team.

+

For hosted blogs on Micro.blog, we encourage everyone to map a custom domain to their content, and we throw in free SSL and preserve redirects for old posts on imported WordPress content. There’s more we can do.

+

I’m working on the next version of the macOS app for Micro.blog now, which features multiple accounts and even multiple blogs under the same account. Here’s a screenshot of the settings screen:

+

Mac screenshot

+

The goal with Micro.blog is not to be a stop-gap hosting provider, with truly “serious” users eventually moving on to something else (although we make that easy). We want Micro.blog hosting to be the best platform for owning your content and participating in the Micro.blog and IndieWeb communities.

+
+ + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertArrayHasKey('category', $output['items'][0]['properties']); + $this->assertCount(5, $output['items'][0]['properties']['category']); + $this->assertContains('technology', $output['items'][0]['properties']['category']); + $this->assertContains('domains', $output['items'][0]['properties']['category']); + $this->assertContains('indiewebcamp', $output['items'][0]['properties']['category']); + $this->assertContains('microblog', $output['items'][0]['properties']['category']); + $this->assertContains('wordpress', $output['items'][0]['properties']['category']); + } + } From 40455f395666e2d3e57108ac52f8ccecc57fd3ce Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Sat, 24 Mar 2018 14:26:53 -0700 Subject: [PATCH 2/5] Add failing test and fix for #157 (hreview) --- Mf2/Parser.php | 5 ++++ tests/Mf2/ClassicMicroformatsTest.php | 39 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 406283b..45d1ab9 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1576,6 +1576,8 @@ public function backcompat(DOMElement $el, $context = '', $isParentMf2 = false) } } } + + $this->upgradeRelTagToCategory($el); break; case 'vevent': @@ -1996,6 +1998,9 @@ public function query($expression, $context = null) { 'description' => array( 'replace' => 'e-content' ), + 'category' => array( + 'replace' => 'p-category' + ), ), 'hproduct' => array( 'fn' => array( diff --git a/tests/Mf2/ClassicMicroformatsTest.php b/tests/Mf2/ClassicMicroformatsTest.php index f64c123..4af0d21 100644 --- a/tests/Mf2/ClassicMicroformatsTest.php +++ b/tests/Mf2/ClassicMicroformatsTest.php @@ -888,5 +888,44 @@ public function testHEntryRelTag() { $this->assertContains('wordpress', $output['items'][0]['properties']['category']); } + /** + * @see https://github.com/indieweb/php-mf2/issues/157 + * @see source: http://jg.typepad.com/ciel/2006/02/daniel_bouluds_.html + */ + public function testHReviewRelTag() { + $input = '
+ +

+ Divine Brunch! +

+ + Feb 19, 2006 + + by Joan Gelfand + +
+ + Garçon + +
+
1101 Valencia Street
+ San Francisco, + CA +
+
+

+ ★★★ +Best Benedicts! +Two perfectly poached eggs and a thin slice of tasty, French ham rest on a circle of warm brioche. Drizzled on top is a light, slightly tangy sauce. Seamless! The sophisticated room and great wine list added to the whole experience - Super!

+

Technorati Tags: ,

+
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertArrayHasKey('category', $output['items'][0]['properties']); + $this->assertCount(2, $output['items'][0]['properties']['category']); + $this->assertContains('Garcon', $output['items'][0]['properties']['category']); + $this->assertContains('Garçon', $output['items'][0]['properties']['category']); + } } From c5f4e91cf0c87591268e6f25ce528f6a76ea412f Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Sat, 24 Mar 2018 14:29:31 -0700 Subject: [PATCH 3/5] Add rel-tag tests --- tests/Mf2/ClassicMicroformatsTest.php | 26 ++++++++++++++++++++++++++ tests/Mf2/CombinedMicroformatsTest.php | 20 ++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tests/Mf2/ClassicMicroformatsTest.php b/tests/Mf2/ClassicMicroformatsTest.php index 4af0d21..e6c7921 100644 --- a/tests/Mf2/ClassicMicroformatsTest.php +++ b/tests/Mf2/ClassicMicroformatsTest.php @@ -927,5 +927,31 @@ public function testHReviewRelTag() { $this->assertContains('Garcon', $output['items'][0]['properties']['category']); $this->assertContains('Garçon', $output['items'][0]['properties']['category']); } + + /** + * Should return the last non-empty URL segment + * @see https://github.com/indieweb/php-mf2/issues/157 + */ + public function testRelTagTrailingSlash() { + $input = ' + + +'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertArrayHasKey('category', $output['items'][0]['properties']); + $this->assertContains('testing', $output['items'][0]['properties']['category']); + $this->assertContains('microformats', $output['items'][0]['properties']['category']); + $this->assertArrayHasKey('category', $output['items'][1]['properties']); + $this->assertContains('phpmf2', $output['items'][1]['properties']['category']); + $this->assertContains('mf2py', $output['items'][1]['properties']['category']); + } } diff --git a/tests/Mf2/CombinedMicroformatsTest.php b/tests/Mf2/CombinedMicroformatsTest.php index 7f18e75..46f1937 100644 --- a/tests/Mf2/CombinedMicroformatsTest.php +++ b/tests/Mf2/CombinedMicroformatsTest.php @@ -397,5 +397,25 @@ public function testNestedValueDTProperty() { $this->assertEquals('1997-12-12', $output['items'][0]['properties']['acme'][0]['value']); } + /** + * rel=tag should not be upgraded within microformats2 + * @see https://github.com/indieweb/php-mf2/issues/157 + */ + public function testMf2DoesNotParseRelTag() { + $input = '
+ +
+ + +'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertArrayNotHasKey('category', $output['items'][0]['properties']); + $this->assertArrayNotHasKey('category', $output['items'][1]['properties']); + } + } From 4c61222474317858444ca17d5d18920eb40b70e6 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Sun, 25 Mar 2018 09:11:25 -0700 Subject: [PATCH 4/5] add failing test for excluding the generated data element in rel tag backcompat for PR #164 --- tests/Mf2/ClassicMicroformatsTest.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/Mf2/ClassicMicroformatsTest.php b/tests/Mf2/ClassicMicroformatsTest.php index e6c7921..444331e 100644 --- a/tests/Mf2/ClassicMicroformatsTest.php +++ b/tests/Mf2/ClassicMicroformatsTest.php @@ -888,6 +888,25 @@ public function testHEntryRelTag() { $this->assertContains('wordpress', $output['items'][0]['properties']['category']); } + public function testHEntryRelTagInContent() { + $input = <<< END +
+
+ Entry content should not include the generated data element for rel tag backcompat + +
+
+END; + + $parser = new Parser($input); + $output = $parser->parse(); + $item = $output['items'][0]; + + $this->assertEquals(['test'], $item['properties']['category']); + $this->assertEquals('Entry content should not include the generated data element for rel tag backcompat', $item['properties']['content'][0]['value']); + $this->assertEquals('Entry content should not include the generated data element for rel tag backcompat', $item['properties']['content'][0]['html']); + } + /** * @see https://github.com/indieweb/php-mf2/issues/157 * @see source: http://jg.typepad.com/ciel/2006/02/daniel_bouluds_.html From c3db4cb97fe8148713e5a32e792fa8cac9006c49 Mon Sep 17 00:00:00 2001 From: Gregor Morrill Date: Sun, 25 Mar 2018 14:04:36 -0700 Subject: [PATCH 5/5] Update upgradeRelTagToCategory() to avoid adding elements to parsed e-* Fixed test to include the original rel=tag HTML/text --- Mf2/Parser.php | 4 ++-- tests/Mf2/ClassicMicroformatsTest.php | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 45d1ab9..a8f5fa2 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1340,8 +1340,8 @@ public function upgradeRelTagToCategory(DOMElement $el) { $dataEl->setAttribute('class', 'category'); $dataEl->setAttribute('value', $value); - # append before the current element - $tempEl->parentNode->insertBefore($dataEl, $tempEl); + # append as child of input element. this should ensure added element does get parsed inside e-* + $el->appendChild($dataEl); } } } diff --git a/tests/Mf2/ClassicMicroformatsTest.php b/tests/Mf2/ClassicMicroformatsTest.php index 444331e..92457c1 100644 --- a/tests/Mf2/ClassicMicroformatsTest.php +++ b/tests/Mf2/ClassicMicroformatsTest.php @@ -892,8 +892,7 @@ public function testHEntryRelTagInContent() { $input = <<< END
- Entry content should not include the generated data element for rel tag backcompat - + Entry content should not include the generated data element for rel tag backcompat
END; @@ -903,8 +902,8 @@ public function testHEntryRelTagInContent() { $item = $output['items'][0]; $this->assertEquals(['test'], $item['properties']['category']); - $this->assertEquals('Entry content should not include the generated data element for rel tag backcompat', $item['properties']['content'][0]['value']); - $this->assertEquals('Entry content should not include the generated data element for rel tag backcompat', $item['properties']['content'][0]['html']); + $this->assertEquals('Entry content should not include the generated data element for rel tag backcompat test', $item['properties']['content'][0]['value']); + $this->assertEquals('Entry content should not include the generated data element for rel tag backcompat ', $item['properties']['content'][0]['html']); } /**