Skip to content

Commit

Permalink
Merge pull request #273 from zdenek-biberle/improve-cocktail-party-sc…
Browse files Browse the repository at this point in the history
…raper

Some improvements to the Cocktail Party scraper
  • Loading branch information
karlomikus committed Apr 17, 2024
2 parents e2bb254 + 2a37f87 commit ada1c28
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 27 deletions.
57 changes: 39 additions & 18 deletions app/Scraper/AbstractSiteExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -145,44 +145,65 @@ public function method(): ?string
*/
public function toArray(): array
{
$clean = function (?string $str): ?string {
if (!$str) {
return null;
}

$str = str_replace(' ', " ", $str);
$str = preg_replace("/\s+/u", " ", $str);

return html_entity_decode($str, encoding: 'UTF-8');
};

$ingredients = $this->ingredients();

return [
'name' => $clean($this->name()),
'description' => $clean($this->description()),
'name' => $this->clean($this->name()),
'description' => $this->cleanDescription($this->description()),
'source' => $this->source(),
'glass' => $this->glass(),
'instructions' => $this->instructions(),
'garnish' => $clean($this->garnish()),
'garnish' => $this->clean($this->garnish()),
'tags' => $this->tags(),
'method' => $this->method(),
'images' => [
$this->image()
],
'ingredients' => array_map(function (RecipeIngredient $recipeIngredient, int $sort) use ($clean) {
'ingredients' => array_map(function (RecipeIngredient $recipeIngredient, int $sort) {
return [
'name' => $clean(ucfirst($recipeIngredient->name)),
'name' => $this->clean(ucfirst($recipeIngredient->name)),
'amount' => $recipeIngredient->amount,
'amount_max' => $recipeIngredient->amountMax,
'units' => $recipeIngredient->units === '' ? null : $recipeIngredient->units,
'note' => $recipeIngredient->comment === '' ? null : $recipeIngredient->comment,
'original_amount' => $recipeIngredient->originalAmount,
'source' => $clean($recipeIngredient->source),
'source' => $this->clean($recipeIngredient->source),
'optional' => false,
'sort' => $sort,
];
}, $ingredients, array_keys($ingredients)),
];
}

/**
* Cleans up white space in a string and decodes HTML entities.
*
* @param ?string $str The string to clean up.
* @return ?string The cleaned up string.
*/
protected function clean(?string $str): ?string
{
if (!$str) {
return null;
}

$str = str_replace(' ', " ", $str);
$str = preg_replace("/\s+/u", " ", $str);

return html_entity_decode($str, encoding: 'UTF-8');
}

/**
* Clean up the cocktail description.
*
* This function will be used to clean up the string produced by {@see AbstractSiteExtractor::description() description()}.
* Can be overriden by scrapers that do the clean up internally within {@see AbstractSiteExtractor::description() description()}
* so that they can, for example, produce Markdown with properly separated paragraphs.
*
* @param ?string $description The cocktail description to clean up.
* @return ?string The cleaned up description.
*/
protected function cleanDescription(?string $description): ?string
{
return $this->clean($description);
}
}
25 changes: 22 additions & 3 deletions app/Scraper/Sites/CocktailParty.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ public function instructions(): ?string

public function tags(): array
{
return [];
return $this->findDescriptionContainer()->filter('.elementor-widget-post-info a')->each(function ($node): string {
return $node->text();
});
}

public function ingredients(): array
Expand All @@ -50,10 +52,20 @@ public function ingredients(): array
$amount = $node->filter('.amount')->text();
$ingredient = $node->filter('.ingredient')->text();
$recipeIngredient = $this->ingredientParser->parseLine($amount);
$unit = match ($recipeIngredient->units) {
// Cocktail Party may sometimes use units that the ingredient parser doesn't understand.
// Because of that, something like "1 piece" will be parsed as having no unit and an ingredient
// named "piece." Since we know that the .amount element doesn't actually contain the ingredient,
// we can fall back to using the name of the ingredient as the unit if the parsed unit is empty.
'' => $recipeIngredient->name,
// Cocktail Party may say "parts," but they actually mean fluid ounces.
'part' => 'oz',
default => $recipeIngredient->units,
};
return new RecipeIngredient(
$ingredient,
$recipeIngredient->amount,
$recipeIngredient->units,
$unit,
$recipeIngredient->source,
$recipeIngredient->originalAmount,
$recipeIngredient->comment,
Expand All @@ -70,10 +82,17 @@ public function image(): ?array
];
}

protected function cleanDescription(?string $description): ?string
{
// We clean up the description within joinParagraphs already, so we
// do not need to do any further clean up here.
return $description;
}

private function joinParagraphs(Crawler $nodes): string
{
$paragraphs = $nodes->each(function ($node): string {
return $node->text();
return $this->clean($node->text());
});

return implode("\n\n", $paragraphs);
Expand Down
44 changes: 38 additions & 6 deletions tests/Scrapers/CocktailPartyScraperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,69 @@

class CocktailPartyScraperTest extends TestCase
{
public function testScrape(): void
public function testScrapeNegroni(): void
{
$scraper = Manager::scrape('https://cocktailpartyapp.com/drinks/negroni/');
$result = $scraper->toArray();

$instructions = "Build in an ice-filled rocks glass – the order doesn’t matter. Just give it a stir and garnish with an orange twist.";

$this->assertSame('Negroni', $result['name']);
$this->assertSame('Cocktail snobs have a bad habit of using the Negroni as the threshold through which all drinkers must pass before they can be considered true connoisseurs. There’s an element of truth to this view; the balance of flavors requires an experienced tongue to appreciate. But make no mistake – it is a very bitter drink, and it’s not for everyone. Some folks have a genetic variation which allows them to taste bitter compounds, while other people don’t. Those who can are called “supertasters,” and it’s likely that no amount of epicurean taste-acquiring can make this drink palatable to those so afflicted.', $result['description']);
$this->assertSame("Cocktail snobs have a bad habit of using the Negroni as the threshold through which all drinkers must pass before they can be considered true connoisseurs. There’s an element of truth to this view; the balance of flavors requires an experienced tongue to appreciate. But make no mistake – it is a very bitter drink, and it’s not for everyone.\n\nSome folks have a genetic variation which allows them to taste bitter compounds, while other people don’t. Those who can are called “supertasters,” and it’s likely that no amount of epicurean taste-acquiring can make this drink palatable to those so afflicted.", $result['description']);
$this->assertSame('https://cocktailpartyapp.com/drinks/negroni/', $result['source']);
$this->assertSame(null, $result['glass']);
$this->assertSame($instructions, $result['instructions']);
$this->assertSame(null, $result['garnish']);
$this->assertSame([], $result['tags']);
$this->assertSame(['Simple Cocktails', 'Spirit-Forward Cocktails'], $result['tags']);
$this->assertSame(null, $result['method']);
$this->assertSame('https://cocktailpartyapp.com/wp-content/uploads/Negroni.webp', $result['images'][0]['url']);
$this->assertSame('CocktailParty', $result['images'][0]['copyright']);

$this->assertSame(1.5, $result['ingredients'][0]['amount']);
$this->assertSame('part', $result['ingredients'][0]['units']);
$this->assertSame('oz', $result['ingredients'][0]['units']);
$this->assertSame('Bitter orange apéritif', $result['ingredients'][0]['name']);
$this->assertSame(false, $result['ingredients'][0]['optional']);

$this->assertSame(1.5, $result['ingredients'][1]['amount']);
$this->assertSame('part', $result['ingredients'][1]['units']);
$this->assertSame('oz', $result['ingredients'][1]['units']);
$this->assertSame('Sweet vermouth', $result['ingredients'][1]['name']);
$this->assertSame(false, $result['ingredients'][1]['optional']);

$this->assertSame(1.5, $result['ingredients'][2]['amount']);
$this->assertSame('part', $result['ingredients'][2]['units']);
$this->assertSame('oz', $result['ingredients'][2]['units']);
$this->assertSame('Gin', $result['ingredients'][2]['name']);
$this->assertSame(false, $result['ingredients'][2]['optional']);
}

public function testScrapeManhattanBianco(): void
{
$scraper = Manager::scrape('https://cocktailpartyapp.com/drinks/manhattan-bianco/');
$result = $scraper->toArray();

$this->assertSame('Manhattan Bianco', $result['name']);
$this->assertSame("Changing up the type of vermouth called for in a Manhattan is a time-honored way of producing amazing variations on a classic. The Manhattan Bianco is an excellent example of this technique, with floral blanc (or bianco) vermouth taking the place of the original’s sweet vermouth.\n\nIt’s a major change in the flavor profile and, quite frankly, isn’t much like a Manhattan at all. But it is a genuinely lovely drink if you enjoy blanc vermouth. With bourbon, it leans sweeter; with rye, the bright pepper notes come through. Since there are no bitters, the lemon oil from the twist does a lot of work here, and we consider it an essential ingredient.", $result['description']);
$this->assertSame('https://cocktailpartyapp.com/drinks/manhattan-bianco/', $result['source']);
$this->assertSame(null, $result['glass']);
$this->assertSame('Stir all ingredients with ice. Strain into a chilled cocktail or coupe glass. Garnish with a lemon twist.', $result['instructions']);
$this->assertSame(null, $result['garnish']);
$this->assertSame(['Simple Cocktails', 'Spirit-Forward Cocktails'], $result['tags']);
$this->assertSame(null, $result['method']);
$this->assertSame('https://cocktailpartyapp.com/wp-content/uploads/Manhattan-Bianco.webp', $result['images'][0]['url']);
$this->assertSame('CocktailParty', $result['images'][0]['copyright']);

$this->assertSame(1.5, $result['ingredients'][0]['amount']);
$this->assertSame('oz', $result['ingredients'][0]['units']);
$this->assertSame('Bourbon', $result['ingredients'][0]['name']);
$this->assertSame(false, $result['ingredients'][0]['optional']);

$this->assertSame(1.5, $result['ingredients'][1]['amount']);
$this->assertSame('oz', $result['ingredients'][1]['units']);
$this->assertSame('Blanc vermouth', $result['ingredients'][1]['name']);
$this->assertSame(false, $result['ingredients'][1]['optional']);

$this->assertSame(1.0, $result['ingredients'][2]['amount']);
$this->assertSame('piece', $result['ingredients'][2]['units']);
$this->assertSame('Lemon peel', $result['ingredients'][2]['name']);
$this->assertSame(false, $result['ingredients'][2]['optional']);
}
}

0 comments on commit ada1c28

Please sign in to comment.