From 05958ebfb89ab2cb2cc0b4f3690d1d7e393d1ea6 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Fri, 31 Mar 2017 17:07:52 +0100 Subject: [PATCH 1/3] !! BREAKING CHANGES !! * Removed $isRobot, setIsRobot(), getIsRobot(), isRobot(), checkBrowserRobot() * Replaced above with detectScriptedAgent() * Added a new ScriptedAgent class with detection for bots, spiders, etc * Added isWebkit(), getIsWebkit(), setIsWebkit() NOTE: TODO: Should add proper browser engine detection (Webkit, Gecko, Trident, etc) * Added $isTwitterWebView, setIsTwitterWebView(), getIsTwitterWebView(), isTwitterWebView(); * Added browser detection for UC Browser * Added browser detection for NSPlayer (Windows Media Player) * Added OS detection for NSPlayer (Windows Media Player) * Added browser detection for Microsoft Office * Added browser detection for the Apple News app * Added browser detection for the Dalvik (Android) OS * Moved wkHTMLtoPDF to scripted agents and removed the test accordingly * Moved GoogleBot to scripted agents * Moved Slurp to scripted agents * Moved W3CValidator to scripted agents * Moved MSNBot to scripted agents * Renamed "Navigator" to "Android Navigator" for clarity * Strip linebreaks in setVersion (fixes a failng test) * Added .idea to the gitignore (I use PHPstorm, don't be a hater) TODO: Tests for new class --- .gitignore | 1 + README.md | 99 ++- src/Browser.php | 152 +++- src/BrowserDetector.php | 182 +++- src/OsDetector.php | 5 + src/ScriptedAgent.php | 193 +++++ src/ScriptedAgentDetector.php | 800 ++++++++++++++++++ .../Tests/_files/UserAgentStrings.xml | 11 - 8 files changed, 1360 insertions(+), 83 deletions(-) create mode 100644 src/ScriptedAgent.php create mode 100644 src/ScriptedAgentDetector.php diff --git a/.gitignore b/.gitignore index 81b9258..86d9b11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ composer.lock phpunit.xml vendor +.idea \ No newline at end of file diff --git a/README.md b/README.md index cee8a05..92e5ef5 100644 --- a/README.md +++ b/README.md @@ -51,16 +51,13 @@ The Browser class allows you to detect a user's browser and version. * Lynx * Safari * Chrome - * Navigator - * GoogleBot - * Yahoo! Slurp - * W3C Validator + * Android Navigator + * UC Browser * BlackBerry * IceCat * Nokia S60 OSS Browser * Nokia Browser * MSN Browser - * MSN Bot * Netscape Navigator * Galeon * NetPositive @@ -69,14 +66,16 @@ The Browser class allows you to detect a user's browser and version. * Yandex Browser * Comodo Dragon * Samsung Browser - * wkhtmltopdf ### Usage ```php use Sinergi\BrowserDetector\Browser; -$browser = new Browser(); +$browser = new Browser(); + +//You can also provide a userAgent string if you don't wish to detec the current browser +//$browser = new Browser("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"); if ($browser->getName() === Browser::IE && $browser->getVersion() < 11) { echo 'Please upgrade your browser.'; @@ -97,6 +96,92 @@ if ($browser->getName() === Browser::IE && $browser->isCompatibilityMode()) { } ``` +## Scripted Agent Detection + +The ScriptedAgent class allows you to detect scripted agents (bots, spiders, tools) + +### Scripted Agents Detected + +Spiders + + * GoogleBot + * Baidu + * Bing + * MSN + * Yahoo! Slurp + * W3C Spiders + * Yandex + * Apple + * Paper.li + * Majestic12 + * Livelap + * Scoop.it + * Who.is + * Proximic + +Web Surveys + + * Ahrefs + * MetaURI + * Netcraft + * Browsershots + * MageReport + * SocialRank.io + * Gluten Free + * Ubermetrics + * Verisign IPS-Agent + +Exploits + + * ShellShock + +Web Preview bots + + * ICQ + * Google Web + * Facebook + * Bing + * Twitter + * Skype + +Tools + + * wkHTMLtoPDF + * W3C Validator + * WebDAV + * TLSProbe + * Wget + * Zgrab + +Generic + + * Google Favicon + * Curl + * Python + * GoLang + * Perl + * Java + +Ad bots + + * Google + * Microsoft + * AdBeat + +### Usage + +```php +use Sinergi\BrowserDetector\Browser; + +$browser = new Browser(); + +$scriptedAgent = $browser->detectScriptedAgent(); +if ($scriptedAgent!==false) +{ + die("Detected ".$scriptedAgent->getName()." which is a ".$scriptedAgent->getType().". Info: ".$scriptedAgent->getInfoURL()); +} +``` + ## OS Detection The OS class allows you to detect a user's operating system and version. diff --git a/src/Browser.php b/src/Browser.php index 2e34aca..52b0f44 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -25,20 +25,15 @@ class Browser const MOZILLA = 'Mozilla'; const AMAYA = 'Amaya'; const LYNX = 'Lynx'; - const WKHTMLTOPDF = 'wkhtmltopdf'; const SAFARI = 'Safari'; const SAMSUNG_BROWSER = 'SamsungBrowser'; const CHROME = 'Chrome'; - const NAVIGATOR = 'Navigator'; - const GOOGLEBOT = 'GoogleBot'; - const SLURP = 'Yahoo! Slurp'; - const W3CVALIDATOR = 'W3C Validator'; + const NAVIGATOR = 'Android Navigator'; const BLACKBERRY = 'BlackBerry'; const ICECAT = 'IceCat'; const NOKIA_S60 = 'Nokia S60 OSS Browser'; const NOKIA = 'Nokia Browser'; const MSN = 'MSN Browser'; - const MSNBOT = 'MSN Bot'; const NETSCAPE_NAVIGATOR = 'Netscape Navigator'; const GALEON = 'Galeon'; const NETPOSITIVE = 'NetPositive'; @@ -47,6 +42,11 @@ class Browser const YANDEX = 'Yandex'; const EDGE = 'Edge'; const DRAGON = 'Dragon'; + const NSPLAYER = 'Windows Media Player'; + const UCBROWSER = 'UC Browser'; + const MICROSOFT_OFFICE = 'Microsoft Office'; + const APPLE_NEWS = 'Apple News'; + const DALVIK = 'Android'; const VERSION_UNKNOWN = 'unknown'; @@ -67,18 +67,23 @@ class Browser /** * @var bool */ - private $isRobot = false; + private $isChromeFrame = false; /** * @var bool */ - private $isChromeFrame = false; + private $isWebkit = false; /** * @var bool */ private $isFacebookWebView = false; + /** + * @var bool + */ + private $isTwitterWebView = false; + /** * @var bool */ @@ -101,7 +106,7 @@ public function __construct($userAgent = null) } /** - * Set the name of the OS. + * Set the name of the Browser. * * @param string $name * @@ -149,7 +154,8 @@ public function isBrowser($name) */ public function setVersion($version) { - $this->version = (string)$version; + //The regex for the Firefox version lets through a linebreak, causing the test to fail + $this->version = str_replace("\n","",(string)$version); return $this; } @@ -169,39 +175,101 @@ public function getVersion() } /** - * Set the Browser to be a robot. + * Detects scripted agents (robots / bots) + * Returns a resolved ScriptedAgent object if detected. + * Otherwise returns false. * - * @param bool $isRobot + * @return ScriptedAgent|bool + */ + public function detectScriptedAgent() + { + $ua = $this->getUserAgent()->getUserAgentString(); + if (stripos($ua, 'bot') !== FALSE || + stripos($ua, 'spider') !== FALSE || + stripos($ua, 'crawler') !== FALSE || + stripos($ua, 'preview') !== FALSE || + stripos($ua, 'slurp') !== FALSE || + stripos($ua, 'facebookexternalhit') !== FALSE || + stripos($ua, 'mediapartners') !== FALSE || + stripos($ua, 'google-adwords') !== FALSE || + stripos($ua, 'adxvastfetcher') !== FALSE || + stripos($ua, 'adbeat') !== FALSE || + stripos($ua, 'google favicon') !== FALSE || + stripos($ua, 'webdav client') !== FALSE || + stripos($ua, 'metauri api') !== FALSE || + stripos($ua, 'tlsprobe') !== FALSE || + stripos($ua, 'wpif') !== FALSE || + stripos($ua, 'imgsizer') !== FALSE || + stripos($ua, 'netcraft ssl server survey') !== FALSE || + stripos($ua, 'curl/') !== FALSE || + stripos($ua, 'go-http-client/') !== FALSE || + stripos($ua, 'python') !== FALSE || + stripos($ua, 'libwww') !== FALSE || + stripos($ua, 'wget/') !== FALSE || + stripos($ua, 'zgrab/') !== FALSE || + stripos($ua, 'Java/') !== FALSE || + stripos($ua, '() { :;}; /bin/bash -c') !== FALSE || + stripos($ua, 'browsershots') !== FALSE || + stripos($ua, 'magereport') !== FALSE || + stripos($ua, 'ubermetrics-technologies') !== FALSE || + stripos($ua, 'W3C') !== FALSE || + stripos($ua, 'Validator') !== FALSE || + stripos($ua, 'Jigsaw/') !== FALSE || + stripos($ua, 'bing') !== FALSE || + stripos($ua, 'msn') !== FALSE || + stripos($ua, 'Google Web Preview') !== FALSE || + stripos($ua, 'ips-agent') !== FALSE || + (stripos($ua, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) //ICQ Preview + ) + { + $scriptedAgent = new ScriptedAgent($ua); + if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) + { + return false; + } + else + { + return $scriptedAgent; + } + } + else + { + return false; + } + } + + /** + * @param bool $isChromeFrame * * @return $this */ - public function setIsRobot($isRobot) + public function setIsChromeFrame($isChromeFrame) { - $this->isRobot = (bool)$isRobot; + $this->isChromeFrame = (bool)$isChromeFrame; return $this; } /** - * Is the browser from a robot (ex Slurp,GoogleBot)? + * Used to determine if the browser is actually "chromeframe". * * @return bool */ - public function getIsRobot() + public function getIsChromeFrame() { if (!isset($this->name)) { BrowserDetector::detect($this, $this->getUserAgent()); } - return $this->isRobot; + return $this->isChromeFrame; } /** * @return bool */ - public function isRobot() + public function isChromeFrame() { - return $this->getIsRobot(); + return $this->getIsChromeFrame(); } /** @@ -209,9 +277,9 @@ public function isRobot() * * @return $this */ - public function setIsChromeFrame($isChromeFrame) + public function setIsWebkit($isWebkit) { - $this->isChromeFrame = (bool)$isChromeFrame; + $this->isWebkit = (bool)$isWebkit; return $this; } @@ -221,21 +289,21 @@ public function setIsChromeFrame($isChromeFrame) * * @return bool */ - public function getIsChromeFrame() + public function getIsWebkit() { if (!isset($this->name)) { BrowserDetector::detect($this, $this->getUserAgent()); } - return $this->isChromeFrame; + return $this->isWebkit; } /** * @return bool */ - public function isChromeFrame() + public function isWebkit() { - return $this->getIsChromeFrame(); + return $this->getIsWebkit(); } /** @@ -272,6 +340,40 @@ public function isFacebookWebView() return $this->getIsFacebookWebView(); } + /** + * @param bool $isTwitterWebView + * + * @return $this + */ + public function setIsTwitterWebView($isTwitterWebView) + { + $this->isTwitterWebView = (bool) $isTwitterWebView; + + return $this; + } + + /** + * Used to determine if the browser is actually "Twitter". + * + * @return bool + */ + public function getIsTwitterWebView() + { + if (!isset($this->name)) { + BrowserDetector::detect($this, $this->getUserAgent()); + } + + return $this->isTwitterWebView; + } + + /** + * @return bool + */ + public function isTwitterWebView() + { + return $this->getIsTwitterWebView(); + } + /** * @param UserAgent $userAgent * diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index 5156188..d18be6f 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -46,16 +46,14 @@ class BrowserDetector implements DetectorInterface 'Samsung', 'Chrome', 'OmniWeb', + 'UCBrowser', //before Android // common mobile 'Android', 'BlackBerry', 'Nokia', 'Gsa', - // common bots - 'Robot', - // wkhtmltopdf before Safari - 'Wkhtmltopdf', // WebKit base check (post mobile and others) + 'AppleNews', 'Safari', // everyone else 'NetPositive', @@ -65,6 +63,8 @@ class BrowserDetector implements DetectorInterface 'Phoenix', 'Amaya', 'Lynx', + 'NSPlayer', + 'Office', 'Shiretoko', 'IceCat', 'Iceweasel', @@ -92,6 +92,8 @@ public static function detect(Browser $browser, UserAgent $userAgent = null) self::checkChromeFrame(); self::checkFacebookWebView(); + self::checkTwitterWebView(); + self::checkWebkit(); foreach (self::$browsersList as $browserName) { $funcName = self::FUNC_PREFIX . $browserName; @@ -120,6 +122,22 @@ public static function checkChromeFrame() return false; } + /** + * Determine if the browser is a wekit webview. + * + * @return bool + */ + public static function checkWebkit() + { + if (strpos(self::$userAgentString, 'AppleWebKit/') !== false) { + self::$browser->setIsWebkit(true); + + return true; + } + + return false; + } + /** * Determine if the user is using Facebook. * @@ -136,6 +154,24 @@ public static function checkFacebookWebView() return false; } + /** + * Determine if the user is using Twitter. + * + * @return bool + */ + public static function checkTwitterWebView() + { + if (strpos(self::$userAgentString, 'Twitter for') !== false) { + self::$browser->setIsTwitterWebView(true); + + return true; + } + + return false; + } + + + /** * Determine if the user is using a BlackBerry. * @@ -173,25 +209,6 @@ public static function checkBrowserBlackBerry() return false; } - /** - * Determine if the browser is a robot. - * - * @return bool - */ - public static function checkBrowserRobot() - { - if (stripos(self::$userAgentString, 'bot') !== false || - stripos(self::$userAgentString, 'spider') !== false || - stripos(self::$userAgentString, 'crawler') !== false - ) { - self::$browser->setIsRobot(true); - - return true; - } - - return false; - } - /** * Determine if the browser is Internet Explorer. * @@ -731,7 +748,7 @@ public static function checkBrowserIceCat() */ public static function checkBrowserNokia() { - if (preg_match("/Nokia([^\/]+)\/([^ SP]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Nokia([^\\/]+)\\/([^ SP]+)/i", self::$userAgentString, $matches)) { self::$browser->setVersion($matches[2]); if (stripos(self::$userAgentString, 'Series60') !== false || strpos(self::$userAgentString, 'S60') !== false @@ -755,7 +772,7 @@ public static function checkBrowserNokia() public static function checkBrowserFirefox() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/Firefox[\/ \(]([^ ;\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Firefox[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -781,7 +798,7 @@ public static function checkBrowserFirefox() public static function checkBrowserSeaMonkey() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/SeaMonkey[\/ \(]([^ ;\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/SeaMonkey[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -901,20 +918,7 @@ public static function checkBrowserAmaya() return false; } - /** - * Determine if the browser is Safari. - * - * @return bool - */ - public static function checkBrowserWkhtmltopdf() - { - if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { - self::$browser->setName(Browser::WKHTMLTOPDF); - return true; - } - return false; - } /** * Determine if the browser is Safari. * @@ -987,7 +991,7 @@ public static function checkBrowserDragon() */ public static function checkBrowserAndroid() { - // Navigator + // Android Navigator if (stripos(self::$userAgentString, 'Android') !== false) { if (preg_match('/Version\/([\d\.]*)/i', self::$userAgentString, $matches)) { if (isset($matches[1])) { @@ -1001,6 +1005,104 @@ public static function checkBrowserAndroid() return true; } + // Dalvik (Android OS) + if (stripos(self::$userAgentString, 'Dalvik/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'Dalvik')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::DALVIK); + + return true; + } + + return false; + } + + /** + * Determine if the browser is UCBrowser. + * + * @return bool + */ + public static function checkBrowserUCBrowser() + { + // Navigator + if (stripos(self::$userAgentString, 'UCBrowser/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'UCBrowser')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::UCBROWSER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is Windows Media Player. + * + * @return bool + */ + public static function checkBrowserNSPlayer() + { + // Navigator + if (stripos(self::$userAgentString, 'NSPlayer/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'NSPlayer')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::NSPLAYER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is Microsoft Office. + * + * @return bool + */ + public static function checkBrowserOffice() + { + // Navigator + if (stripos(self::$userAgentString, 'Microsoft Office') !== false) { + self::$browser->setVersion(Browser::VERSION_UNKNOWN); + self::$browser->setName(Browser::NSPLAYER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is the Apple News app. + * + * @return bool + */ + public static function checkBrowserAppleNews() + { + // Navigator + if (stripos(self::$userAgentString, 'AppleNews/') !== false) { + if (preg_match('/Version\/([\d\.]*)/i', self::$userAgentString, $matches)) { + if (isset($matches[1])) { + self::$browser->setVersion($matches[1]); + } + } else { + self::$browser->setVersion(Browser::VERSION_UNKNOWN); + } + self::$browser->setName(Browser::APPLE_NEWS); + + return true; + } + return false; } } diff --git a/src/OsDetector.php b/src/OsDetector.php index 40b0611..8510e90 100644 --- a/src/OsDetector.php +++ b/src/OsDetector.php @@ -209,6 +209,11 @@ private static function checkWindows(Os $os, UserAgent $userAgent) return true; } + if (stripos($userAgent->getUserAgentString(), 'NSPlayer/') !== false) { + $os->setName(Os::WINDOWS); + $os->setVersion(Os::VERSION_UNKNOWN); + return true; + } return false; } diff --git a/src/ScriptedAgent.php b/src/ScriptedAgent.php new file mode 100644 index 0000000..826a146 --- /dev/null +++ b/src/ScriptedAgent.php @@ -0,0 +1,193 @@ +setUserAgent($userAgent); + } elseif (null === $userAgent || is_string($userAgent)) { + $this->setUserAgent(new UserAgent($userAgent)); + } else { + throw new InvalidArgumentException(); + } + } + + /** + * Set the name of the ScriptedAgent. + * + * @param string $name + * + * @return void + */ + public function setName($name) + { + $this->name = (string)$name; + } + + /** + * Return the name of the ScriptedAgent. + * + * @return string + */ + public function getName() + { + if (!isset($this->name)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + + return $this->name; + } + + /** + * Set the type of the ScriptedAgent. + * + * @param string $type + * + * @return void + */ + public function setType($type) + { + $this->type = (string)$type; + } + + /** + * Return the type of the ScriptedAgent. + * + * @return string + */ + public function getType() + { + if (!isset($this->type)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + + return $this->type; + } + + /** + * Set the info URL for the ScriptedAgent. + * + * @param string $url + * + * @return void + */ + public function setInfoURL($url) + { + $this->url = (string)$url; + } + + /** + * Return the info URL for the ScriptedAgent. + * + * @return string + */ + public function getInfoURL() + { + if (!isset($this->url)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + return $this->url; + } + + /** + * @param UserAgent $userAgent + * + * @return void + */ + public function setUserAgent(UserAgent $userAgent) + { + $this->userAgent = $userAgent; + } + + /** + * @return UserAgent + */ + public function getUserAgent() + { + return $this->userAgent; + } + + +} + + +?> \ No newline at end of file diff --git a/src/ScriptedAgentDetector.php b/src/ScriptedAgentDetector.php new file mode 100644 index 0000000..c51cf84 --- /dev/null +++ b/src/ScriptedAgentDetector.php @@ -0,0 +1,800 @@ +getUserAgent(); + } + self::$userAgentString = $userAgent->getUserAgentString(); + + self::$scriptedAgent->setName(ScriptedAgent::UNKNOWN); + self::$scriptedAgent->setType(ScriptedAgent::UNKNOWN); + self::$scriptedAgent->setInfoURL(ScriptedAgent::UNKNOWN); + + foreach (self::$robotsList as $robotName) { + $funcName = self::FUNC_PREFIX . $robotName; + + if (self::$funcName()) { + return true; + } + } + + return false; + } + + /** + * Determine if the browser is wkHTMLtoPDF + * + * @return bool + */ + public static function checkRobotwkHTMLtoPDF() + { + if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { + self::$scriptedAgent->setName(ScriptedAgent::WKHTMLTOPDF); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://wkhtmltopdf.org/"); + return true; + } + return false; + } + + /** + * Determine if the browser is the ICQ preview. + * + * @return bool + */ + public static function checkRobotICQ() + { + //Chrome 51 always provides the Upgrade-Insecure-Requests header. ICQ does not. + //But to be extra safe, also check for the russian language which the ICQ bot sets. + if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) + { + self::$scriptedAgent->setName(ScriptedAgent::ICQ); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://icq.com"); + return true; + } + return false; + } + + /** + * Determine if the agent is GoogleBot, or a google ads bot. + * + * @return bool + */ + public static function checkRobotGoogle() + { + if (stripos(self::$userAgentString, "Googlebot") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + if (stripos(self::$userAgentString, "AdsBot-Google") !== false + || stripos(self::$userAgentString, "Mediapartners-Google") !== false + || stripos(self::$userAgentString, "Google-Adwords") !== false + || stripos(self::$userAgentString, "AdXVastFetcher-Google") !== false + ) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEADS); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + if (stripos(self::$userAgentString, "Google Favicon") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEFAVICON); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4626518.htm"); + return true; + } + if (stripos(self::$userAgentString, "Google Web Preview") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEPREVIEW); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.distilnetworks.com/bot-directory/bot/google-web-preview/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Baidu spider. + * + * @return bool + */ + public static function checkRobotBaidu() + { + if (stripos(self::$userAgentString, "Baiduspider") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BAIDU); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Facebook preview bot. + * + * @return bool + */ + public static function checkRobotFacebook() + { + if (stripos(self::$userAgentString, "facebookexternalhit") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::FACEBOOK); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.facebook.com/externalhit_uatext.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the bing spider, bing preview bot, or MSN bot + * + * @return bool + */ + public static function checkRobotBing() + { + + if (stripos(self::$userAgentString, "adidxbot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BING); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "/bingbot.htm") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BING); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "/msnbot.htm") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::MSNBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "BingPreview/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BING_PREVIEW); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Yahoo Slurp! Spider. + * + * @return bool + * + */ + public static function checkRobotSlurp() + { + if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SLURP); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://help.yahoo.com/kb/SLN22600.html"); + return true; + } + return false; + } + + /** + * Determine if the agent is the twitter preview bot. + * + * @return bool + */ + public static function checkRobotTwitter() + { + if (stripos(self::$userAgentString, "Twitterbot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::TWITTER); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("http://stackoverflow.com/questions/22362215/twitter-user-agent-on-sharing"); + return true; + } + return false; + } + + /** + * Determine if the agent is the skype preview bot. + * + * @return bool + */ + public static function checkRobotSkype() + { + if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SKYPE); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("http://www.skype.com"); + return true; + } + return false; + } + + /** + * Determine if the agent is the W3C Validator tool. + * + * @return bool + */ + public static function checkRobotW3CValidator() + { + if (stripos(self::$userAgentString, "W3C_Validator/") !== false || + stripos(self::$userAgentString, "Validator.nu/") !== false || + stripos(self::$userAgentString, "W3C-mobileOK/DDC-") !== false || + stripos(self::$userAgentString, "W3C_I18n-Checker/") !== false || + stripos(self::$userAgentString, "FeedValidator/") !== false || + stripos(self::$userAgentString, "Jigsaw/") !== false || + stripos(self::$userAgentString, "JW3C_Unicorn/") !== false + ) + { + self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); + return true; + } + if (stripos(self::$userAgentString, "NING/") !== false || + stripos(self::$userAgentString, "W3C-checklink") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Yandex spider. + * + * @return bool + */ + public static function checkRobotYandex() + { + if (stripos(self::$userAgentString, "YandexBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::YANDEX); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://yandex.com/bots"); + return true; + } + return false; + } + + /** + * Determine if the agent is the AppleBot + * + * @return bool + */ + public static function checkRobotApple() + { + if (stripos(self::$userAgentString, "AppleBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::APPLEBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.apple.com/en-gb/HT204683"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Paper.li bot. + * + * @return bool + */ + public static function checkRobotPaperli() + { + if (stripos(self::$userAgentString, "PaperLiBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PAPERLI); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.paper.li/hc/en-us/articles/204105253-What-is-Paper-li-"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Ahrefs survey. + * + * @return bool + */ + public static function checkRobotAhrefs() + { + if (stripos(self::$userAgentString, "AhrefsBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::AHREFS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://ahrefs.com/robot"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Majestic 12 spider. + * + * @return bool + */ + public static function checkRobotMJ12() + { + if (stripos(self::$userAgentString, "MJ12Bot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::MJ12); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.majestic12.co.uk/projects/dsearch/mj12bot.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the LiveLap spider. + * + * @return bool + */ + public static function checkRobotLiveLap() + { + if (stripos(self::$userAgentString, "LivelapBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::LIVELAP); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://site.livelap.com/crawler.html"); + return true; + } + return false; + } + + /** + * Determine if the agent is a Web Distributed Authoring and Versioning client. Usually unexpected WebDAV requests are hack attempts. + * + * @return bool + */ + public static function checkRobotWebdav() + { + if (stripos(self::$userAgentString, "WEBDAV Client") !== false || + stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) //Office Webdav probe + { + self::$scriptedAgent->setName(ScriptedAgent::WEBDAV); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://en.wikipedia.org/wiki/WebDAV"); + return true; + } + return false; + } + + /** + * Determine if the agent is the MetaURI scraper. + * + * @return bool + */ + public static function checkRobotMetaURI() + { + if (stripos(self::$userAgentString, "MetaURI API/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::METAURI); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://github.com/stateless-systems/uri-meta"); + return true; + } + return false; + } + + /** + * Determine if the agent is the TLSProbe tool. + * + * @return bool + */ + public static function checkRobotTLSProbe() + { + if (stripos(self::$userAgentString, "TLSProbe/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::TLSPROBE); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://bitbucket.org/marco-bellaccini/tlsprobe"); + return true; + } + return false; + } + + /** + * Determine if the agent is the scoop.it bots. + * + * @return bool + */ + public static function checkRobotScoopIt() + { + if (stripos(self::$userAgentString, "wpif Safari") !== false + || stripos(self::$userAgentString, "imgsizer Safari") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SCOOPIT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4785385.htm"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Netcraft SSL Survey. + * + * @return bool + */ + public static function checkRobotNetcraft() + { + if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::NETCRAFT); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.netcraft.com/internet-data-mining/ssl-survey/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the curl library/cli tool. + * + * @return bool + */ + public static function checkRobotCurl() + { + if (stripos(self::$userAgentString, "curl/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::CURL); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://curl.haxx.se/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the python programming language. + * + * @return bool + */ + public static function checkRobotPython() + { + if (stripos(self::$userAgentString, "python-requests/") !== false || + stripos(self::$userAgentString, "python-urllib/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PYTHON); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.python.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the GoLang programming language. + * + * @return bool + */ + public static function checkRobotGoLang() + { + if (stripos(self::$userAgentString, "Go-http-client") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOLANG); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://golang.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the perl programming language. + * + * @return bool + */ + public static function checkRobotPerl() + { + if (stripos(self::$userAgentString, "libwww-perl/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PERL); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.perl.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the wget tool. + * + * @return bool + */ + public static function checkRobotWget() + { + if (stripos(self::$userAgentString, "Wget/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::WGET); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://www.gnu.org/software/wget/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the zgrab TLS banner tool. + * + * @return bool + */ + public static function checkRobotZGrab() + { + if (stripos(self::$userAgentString, "zgrab/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::ZGRAB); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://github.com/zmap/zgrab"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Java programming language. + * + * @return bool + */ + public static function checkRobotJava() + { + if (stripos(self::$userAgentString, "Java/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::JAVA); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.java.com/en/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the ShellShock exploit. + * + * @return bool + */ + public static function checkRobotShellshock() + { + if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SHELLSHOCK); + self::$scriptedAgent->setType(ScriptedAgent::EXPLOIT); + self::$scriptedAgent->setInfoURL("https://blog.cloudflare.com/inside-shellshock/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the browsershots testing tool. + * + * @return bool + */ + public static function checkRobotBrowershots() + { + if (stripos(self::$userAgentString, "Browsershots") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BROWSERSHOTS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://browsershots.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the who.is spider. + * + * @return bool + */ + public static function checkRobotWhois() + { + if (stripos(self::$userAgentString, "who.is bot") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::WHOIS); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.who.is/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the MageReport exploit survey. + * + * @return bool + */ + public static function checkRobotMageReport() + { + if (stripos(self::$userAgentString, "MageReport") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::MAGEREPORT); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.magereport.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the AdBeat advertising survey. + * + * @return bool + */ + public static function checkRobotAdbeat() + { + if (stripos(self::$userAgentString, "adbeat.com") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::ADBEAT); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://www.adbeat.com/operation_policy"); + return true; + } + return false; + } + + /** + * Determine if the agent is the SocialRankIO crawler. + * + * @return bool + */ + public static function checkRobotSocialrank() + { + if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SOCIALRANK); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://socialrank.io/about"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Gluten Free crawler. + * + * @return bool + */ + public static function checkRobotGlutenFree() + { + if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GLUTENFREE); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://glutenfreepleasure.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Proximic spider. + * + * @return bool + */ + public static function checkRobotProximic() + { + if (stripos(self::$userAgentString, "proximic;") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PROXIMIC); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.proximic.com/info/spider.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Ubermetrics survey. + * + * @return bool + */ + public static function checkRobotUbermetrics() + { + if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::UBERMETRICS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.ubermetrics-technologies.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Verisign ips-agent. + * + * @return bool + */ + public static function checkRobotVerisign() + { + if (stripos(self::$userAgentString, "ips-agent") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::VERISIGN); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://www.spambotsecurity.com/forum/viewtopic.php?f=7&t=1453"); + return true; + } + return false; + } +} \ No newline at end of file diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index d6628ec..0d91a2f 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -263,16 +263,5 @@ Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-G360T1 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Mobile Safari/537.36 - - wkhtmltopdf - unknown - Linux - unknown - unknown - unknown - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) wkhtmltopdf-amd64 Safari/534.34 - - From 8d6b754548629dc83a2556442f6c74800878417d Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Sat, 1 Apr 2017 20:18:53 +0100 Subject: [PATCH 2/3] Fix typo in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 92e5ef5..0050a5d 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ use Sinergi\BrowserDetector\Browser; $browser = new Browser(); -//You can also provide a userAgent string if you don't wish to detec the current browser +//You can also provide a userAgent string if you don't wish to detect the current browser //$browser = new Browser("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"); if ($browser->getName() === Browser::IE && $browser->getVersion() < 11) { From 09eba0c6e73608471237e27e96a0f700badad29b Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Sat, 1 Apr 2017 20:24:10 +0100 Subject: [PATCH 3/3] Correct regular expression in Firefox and Seamonkey: Instead of: Match everything except ^ ;\), Match: a-z, A-Z, 0-9, and . only. Remove dirty fudge added to remove the stray \n Now passes tests without said fudge --- src/Browser.php | 3 +-- src/BrowserDetector.php | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Browser.php b/src/Browser.php index 52b0f44..8963492 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -154,8 +154,7 @@ public function isBrowser($name) */ public function setVersion($version) { - //The regex for the Firefox version lets through a linebreak, causing the test to fail - $this->version = str_replace("\n","",(string)$version); + $this->version = (string)$version; return $this; } diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index d18be6f..7fef018 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -772,7 +772,7 @@ public static function checkBrowserNokia() public static function checkBrowserFirefox() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/Firefox[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Firefox[\\/ \\(]([a-zA-Z\\d\\.]*)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -798,7 +798,7 @@ public static function checkBrowserFirefox() public static function checkBrowserSeaMonkey() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/SeaMonkey[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/SeaMonkey[\\/ \\(]([a-zA-Z\\d\\.]*)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); }