Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
"async-aws/bedrock-runtime": "^1.1",
"codewithkyrian/transformers": "^0.5.3",
"doctrine/dbal": "^3.3|^4.0",
"php-http/discovery": "^1.20",
"probots-io/pinecone-php": "^1.1",
"psr/http-factory-implementation": "*",
"symfony/ai-agent": "@dev",
"symfony/ai-platform": "@dev",
"symfony/ai-store": "@dev",
Expand All @@ -34,7 +36,8 @@
],
"config": {
"allow-plugins": {
"codewithkyrian/transformers-libsloader": true
"codewithkyrian/transformers-libsloader": true,
"php-http/discovery": true
}
}
}
5 changes: 3 additions & 2 deletions src/agent/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"symfony/type-info": "^7.2.3"
},
"require-dev": {
"mrmysql/youtube-transcript": "^v0.0.5",
"phpstan/phpstan": "^2.0",
"phpunit/phpunit": "^11.5.13",
"symfony/ai-store": "@dev",
Expand All @@ -41,9 +42,9 @@
"symfony/event-dispatcher": "^6.4 || ^7.1"
},
"suggest": {
"mrmysql/youtube-transcript": "For using the YouTube transcription tool.",
"symfony/ai-store": "For using Similarity Search with a vector store.",
"symfony/css-selector": "For using the YouTube transcription tool.",
"symfony/dom-crawler": "For using the YouTube transcription tool."
"symfony/dom-crawler": "For using the Crawler tool."
},
"config": {
"sort-packages": true
Expand Down
52 changes: 11 additions & 41 deletions src/agent/src/Toolbox/Tool/YouTubeTranscriber.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@

namespace Symfony\AI\Agent\Toolbox\Tool;

use MrMySQL\YoutubeTranscript\TranscriptListFetcher;
use Symfony\AI\Agent\Exception\LogicException;
use Symfony\AI\Agent\Exception\RuntimeException;
use Symfony\AI\Agent\Toolbox\Attribute\AsTool;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\HttpClient\Psr18Client;
use Symfony\Contracts\HttpClient\HttpClientInterface;

/**
Expand All @@ -27,11 +26,8 @@
public function __construct(
private HttpClientInterface $client,
) {
if (!class_exists(Crawler::class)) {
throw new LogicException('The Symfony DomCrawler component is required to use this tool. Try running "composer require symfony/dom-crawler".');
}
if (!class_exists(CssSelectorConverter::class)) {
throw new LogicException('The Symfony CSS Selector component is required to use this tool. Try running "composer require symfony/css-selector".');
if (!class_exists(TranscriptListFetcher::class)) {
throw new LogicException('The package `mrmysql/youtube-transcript` is required to use this tool. Try running "composer require mrmysql/youtube-transcript".');
}
}

Expand All @@ -40,40 +36,14 @@ public function __construct(
*/
public function __invoke(string $videoId): string
{
// Fetch the HTML content of the YouTube video page
$htmlResponse = $this->client->request('GET', 'https://youtube.com/watch?v='.$videoId);
$html = $htmlResponse->getContent();

// Use DomCrawler to parse the HTML
$crawler = new Crawler($html);

// Extract the script containing the ytInitialPlayerResponse
$scriptContent = $crawler->filter('script')->reduce(function (Crawler $node) {
return str_contains($node->text(), 'var ytInitialPlayerResponse = {');
})->text();

// Extract and parse the JSON data from the script
$start = strpos($scriptContent, 'var ytInitialPlayerResponse = ') + \strlen('var ytInitialPlayerResponse = ');
$dataString = substr($scriptContent, $start);
$dataString = substr($dataString, 0, strrpos($dataString, ';') ?: null);
$data = json_decode(trim($dataString), true);

// Extract the URL for the captions
if (!isset($data['captions']['playerCaptionsTracklistRenderer']['captionTracks'][0]['baseUrl'])) {
throw new RuntimeException('Captions are not available for this video.');
}
$captionsUrl = $data['captions']['playerCaptionsTracklistRenderer']['captionTracks'][0]['baseUrl'];

// Fetch and parse the captions XML
$xmlResponse = $this->client->request('GET', $captionsUrl);
$xmlContent = $xmlResponse->getContent();
$xmlCrawler = new Crawler($xmlContent);
$psr18Client = new Psr18Client($this->client);
$fetcher = new TranscriptListFetcher($psr18Client, $psr18Client, $psr18Client);

// Collect all text elements from the captions
$transcript = $xmlCrawler->filter('text')->each(function (Crawler $node) {
return $node->text().' YouTubeTranscriber.php';
});
$list = $fetcher->fetch($videoId);
$transcript = $list->findTranscript($list->getAvailableLanguageCodes());

return implode(\PHP_EOL, $transcript);
return array_reduce($transcript->fetch(), function (string $carry, array $item): string {
return $carry.\PHP_EOL.$item['text'];
}, '');
}
}