Skip to content

Commit

Permalink
Fixing sitemap lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
steverobbins committed Mar 12, 2015
1 parent c055043 commit f67d892
Showing 1 changed file with 20 additions and 15 deletions.
35 changes: 20 additions & 15 deletions src/MGA/Console/Command/ScanCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -159,27 +159,32 @@ protected function serverTech()
protected function sitemapExists()
{
$this->writeHeader('Sitemap');
$file = $this->getSitemapFile();
$response = $this->makeRequest($this->url . $file, array(
CURLOPT_NOBODY => true
));
if ($response['code'] == 200) {
$this->output->writeln('<info>Sitemap is accessible:</info> ' . $this->url . $file);
} else {
$this->output->writeln('<error>Sitemap is not accessible:</error> ' . $this->url . $file);
}
}

/**
* Parse the robots.txt text file to find the sitemap
*
* @return string
*/
protected function getSitemapFile()
{
$response = $this->makeRequest($this->url . 'robots.txt');
$found = preg_match('/Sitemap: (.*)/mi', $response['body'], $match);
if ($response['code'] != 200 || !$found || !isset($match[1])) {
$this->output->writeln('<error>Sitemap is not declared in robots.txt</error>');
$sitemap = $this->url . 'sitemap.xml';
return 'sitemap.xml';
} else {
$this->output->writeln('<info>Sitemap is declared in robots.txt</info>');
$sitemap = trim($match[1]);
}
// Sitemap might be bogus
try {
$response = $this->makeRequest($sitemap, array(
CURLOPT_NOBODY => true
));
} catch (\Exception $e) {
// intentionally left blank
}
if ($response['code'] == 200) {
$this->output->writeln('<info>Sitemap is accessible</info>');
} else {
$this->output->writeln('<error>Sitemap is not accessible</error>');
return trim(str_replace($this->url, '', $match[1]));
}
}

Expand Down

0 comments on commit f67d892

Please sign in to comment.