diff --git a/app/Scraper/AbstractSiteExtractor.php b/app/Scraper/AbstractSiteExtractor.php index 1051b501..ab8b1030 100644 --- a/app/Scraper/AbstractSiteExtractor.php +++ b/app/Scraper/AbstractSiteExtractor.php @@ -145,44 +145,65 @@ public function method(): ?string */ public function toArray(): array { - $clean = function (?string $str): ?string { - if (!$str) { - return null; - } - - $str = str_replace(' ', " ", $str); - $str = preg_replace("/\s+/u", " ", $str); - - return html_entity_decode($str, encoding: 'UTF-8'); - }; - $ingredients = $this->ingredients(); - return [ - 'name' => $clean($this->name()), - 'description' => $clean($this->description()), + 'name' => $this->clean($this->name()), + 'description' => $this->cleanDescription($this->description()), 'source' => $this->source(), 'glass' => $this->glass(), 'instructions' => $this->instructions(), - 'garnish' => $clean($this->garnish()), + 'garnish' => $this->clean($this->garnish()), 'tags' => $this->tags(), 'method' => $this->method(), 'images' => [ $this->image() ], - 'ingredients' => array_map(function (RecipeIngredient $recipeIngredient, int $sort) use ($clean) { + 'ingredients' => array_map(function (RecipeIngredient $recipeIngredient, int $sort) { return [ - 'name' => $clean(ucfirst($recipeIngredient->name)), + 'name' => $this->clean(ucfirst($recipeIngredient->name)), 'amount' => $recipeIngredient->amount, 'amount_max' => $recipeIngredient->amountMax, 'units' => $recipeIngredient->units === '' ? null : $recipeIngredient->units, 'note' => $recipeIngredient->comment === '' ? null : $recipeIngredient->comment, 'original_amount' => $recipeIngredient->originalAmount, - 'source' => $clean($recipeIngredient->source), + 'source' => $this->clean($recipeIngredient->source), 'optional' => false, 'sort' => $sort, ]; }, $ingredients, array_keys($ingredients)), ]; } + + /** + * Cleans up white space in a string and decodes HTML entities. + * + * @param ?string $str The string to clean up. + * @return ?string The cleaned up string. + */ + protected function clean(?string $str): ?string + { + if (!$str) { + return null; + } + + $str = str_replace(' ', " ", $str); + $str = preg_replace("/\s+/u", " ", $str); + + return html_entity_decode($str, encoding: 'UTF-8'); + } + + /** + * Clean up the cocktail description. + * + * This function will be used to clean up the string produced by {@see AbstractSiteExtractor::description() description()}. + * Can be overriden by scrapers that do the clean up internally within {@see AbstractSiteExtractor::description() description()} + * so that they can, for example, produce Markdown with properly separated paragraphs. + * + * @param ?string $description The cocktail description to clean up. + * @return ?string The cleaned up description. + */ + protected function cleanDescription(?string $description): ?string + { + return $this->clean($description); + } } diff --git a/app/Scraper/Sites/CocktailParty.php b/app/Scraper/Sites/CocktailParty.php index ac06b421..be948a26 100644 --- a/app/Scraper/Sites/CocktailParty.php +++ b/app/Scraper/Sites/CocktailParty.php @@ -39,7 +39,9 @@ public function instructions(): ?string public function tags(): array { - return []; + return $this->findDescriptionContainer()->filter('.elementor-widget-post-info a')->each(function ($node): string { + return $node->text(); + }); } public function ingredients(): array @@ -50,10 +52,20 @@ public function ingredients(): array $amount = $node->filter('.amount')->text(); $ingredient = $node->filter('.ingredient')->text(); $recipeIngredient = $this->ingredientParser->parseLine($amount); + $unit = match ($recipeIngredient->units) { + // Cocktail Party may sometimes use units that the ingredient parser doesn't understand. + // Because of that, something like "1 piece" will be parsed as having no unit and an ingredient + // named "piece." Since we know that the .amount element doesn't actually contain the ingredient, + // we can fall back to using the name of the ingredient as the unit if the parsed unit is empty. + '' => $recipeIngredient->name, + // Cocktail Party may say "parts," but they actually mean fluid ounces. + 'part' => 'oz', + default => $recipeIngredient->units, + }; return new RecipeIngredient( $ingredient, $recipeIngredient->amount, - $recipeIngredient->units, + $unit, $recipeIngredient->source, $recipeIngredient->originalAmount, $recipeIngredient->comment, @@ -70,10 +82,17 @@ public function image(): ?array ]; } + protected function cleanDescription(?string $description): ?string + { + // We clean up the description within joinParagraphs already, so we + // do not need to do any further clean up here. + return $description; + } + private function joinParagraphs(Crawler $nodes): string { $paragraphs = $nodes->each(function ($node): string { - return $node->text(); + return $this->clean($node->text()); }); return implode("\n\n", $paragraphs); diff --git a/tests/Scrapers/CocktailPartyScraperTest.php b/tests/Scrapers/CocktailPartyScraperTest.php index f631e275..f14be8ae 100644 --- a/tests/Scrapers/CocktailPartyScraperTest.php +++ b/tests/Scrapers/CocktailPartyScraperTest.php @@ -9,7 +9,7 @@ class CocktailPartyScraperTest extends TestCase { - public function testScrape(): void + public function testScrapeNegroni(): void { $scraper = Manager::scrape('https://cocktailpartyapp.com/drinks/negroni/'); $result = $scraper->toArray(); @@ -17,29 +17,61 @@ public function testScrape(): void $instructions = "Build in an ice-filled rocks glass – the order doesn’t matter. Just give it a stir and garnish with an orange twist."; $this->assertSame('Negroni', $result['name']); - $this->assertSame('Cocktail snobs have a bad habit of using the Negroni as the threshold through which all drinkers must pass before they can be considered true connoisseurs. There’s an element of truth to this view; the balance of flavors requires an experienced tongue to appreciate. But make no mistake – it is a very bitter drink, and it’s not for everyone. Some folks have a genetic variation which allows them to taste bitter compounds, while other people don’t. Those who can are called “supertasters,” and it’s likely that no amount of epicurean taste-acquiring can make this drink palatable to those so afflicted.', $result['description']); + $this->assertSame("Cocktail snobs have a bad habit of using the Negroni as the threshold through which all drinkers must pass before they can be considered true connoisseurs. There’s an element of truth to this view; the balance of flavors requires an experienced tongue to appreciate. But make no mistake – it is a very bitter drink, and it’s not for everyone.\n\nSome folks have a genetic variation which allows them to taste bitter compounds, while other people don’t. Those who can are called “supertasters,” and it’s likely that no amount of epicurean taste-acquiring can make this drink palatable to those so afflicted.", $result['description']); $this->assertSame('https://cocktailpartyapp.com/drinks/negroni/', $result['source']); $this->assertSame(null, $result['glass']); $this->assertSame($instructions, $result['instructions']); $this->assertSame(null, $result['garnish']); - $this->assertSame([], $result['tags']); + $this->assertSame(['Simple Cocktails', 'Spirit-Forward Cocktails'], $result['tags']); $this->assertSame(null, $result['method']); $this->assertSame('https://cocktailpartyapp.com/wp-content/uploads/Negroni.webp', $result['images'][0]['url']); $this->assertSame('CocktailParty', $result['images'][0]['copyright']); $this->assertSame(1.5, $result['ingredients'][0]['amount']); - $this->assertSame('part', $result['ingredients'][0]['units']); + $this->assertSame('oz', $result['ingredients'][0]['units']); $this->assertSame('Bitter orange apéritif', $result['ingredients'][0]['name']); $this->assertSame(false, $result['ingredients'][0]['optional']); $this->assertSame(1.5, $result['ingredients'][1]['amount']); - $this->assertSame('part', $result['ingredients'][1]['units']); + $this->assertSame('oz', $result['ingredients'][1]['units']); $this->assertSame('Sweet vermouth', $result['ingredients'][1]['name']); $this->assertSame(false, $result['ingredients'][1]['optional']); $this->assertSame(1.5, $result['ingredients'][2]['amount']); - $this->assertSame('part', $result['ingredients'][2]['units']); + $this->assertSame('oz', $result['ingredients'][2]['units']); $this->assertSame('Gin', $result['ingredients'][2]['name']); $this->assertSame(false, $result['ingredients'][2]['optional']); } + + public function testScrapeManhattanBianco(): void + { + $scraper = Manager::scrape('https://cocktailpartyapp.com/drinks/manhattan-bianco/'); + $result = $scraper->toArray(); + + $this->assertSame('Manhattan Bianco', $result['name']); + $this->assertSame("Changing up the type of vermouth called for in a Manhattan is a time-honored way of producing amazing variations on a classic. The Manhattan Bianco is an excellent example of this technique, with floral blanc (or bianco) vermouth taking the place of the original’s sweet vermouth.\n\nIt’s a major change in the flavor profile and, quite frankly, isn’t much like a Manhattan at all. But it is a genuinely lovely drink if you enjoy blanc vermouth. With bourbon, it leans sweeter; with rye, the bright pepper notes come through. Since there are no bitters, the lemon oil from the twist does a lot of work here, and we consider it an essential ingredient.", $result['description']); + $this->assertSame('https://cocktailpartyapp.com/drinks/manhattan-bianco/', $result['source']); + $this->assertSame(null, $result['glass']); + $this->assertSame('Stir all ingredients with ice. Strain into a chilled cocktail or coupe glass. Garnish with a lemon twist.', $result['instructions']); + $this->assertSame(null, $result['garnish']); + $this->assertSame(['Simple Cocktails', 'Spirit-Forward Cocktails'], $result['tags']); + $this->assertSame(null, $result['method']); + $this->assertSame('https://cocktailpartyapp.com/wp-content/uploads/Manhattan-Bianco.webp', $result['images'][0]['url']); + $this->assertSame('CocktailParty', $result['images'][0]['copyright']); + + $this->assertSame(1.5, $result['ingredients'][0]['amount']); + $this->assertSame('oz', $result['ingredients'][0]['units']); + $this->assertSame('Bourbon', $result['ingredients'][0]['name']); + $this->assertSame(false, $result['ingredients'][0]['optional']); + + $this->assertSame(1.5, $result['ingredients'][1]['amount']); + $this->assertSame('oz', $result['ingredients'][1]['units']); + $this->assertSame('Blanc vermouth', $result['ingredients'][1]['name']); + $this->assertSame(false, $result['ingredients'][1]['optional']); + + $this->assertSame(1.0, $result['ingredients'][2]['amount']); + $this->assertSame('piece', $result['ingredients'][2]['units']); + $this->assertSame('Lemon peel', $result['ingredients'][2]['name']); + $this->assertSame(false, $result['ingredients'][2]['optional']); + } }