Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Update regexes

  • Loading branch information...
commit ca1012014c23e86999a2ca1f5f4c7c2500ef6b9a 1 parent faf388d
@ozataman authored
Showing with 525 additions and 158 deletions.
  1. +499 −132 resources/user_agent_parser.yaml
  2. +24 −24 test/src/TestSuite.hs
  3. +2 −2 ua-parser.cabal
View
631 resources/user_agent_parser.yaml
@@ -5,27 +5,34 @@ user_agent_parsers:
- regex: '^(Opera)/(\d+)\.(\d+) \(Nintendo Wii'
family_replacement: 'Wii'
- # must go before Firefox to catch SeaMonkey.
- - regex: '(SeaMonkey|Fennec|Camino)/(\d+)\.(\d+)\.?([ab]?\d+[a-z]*)'
-
- # must go before Firefox
-
- # must go before Browser/major_version.minor_version - eg: Minefield/3.1a1pre
+ # must go before Firefox to catch SeaMonkey/Camino
+ - regex: '(SeaMonkey|Camino)/(\d+)\.(\d+)\.?([ab]?\d+[a-z]*)'
+
+ # Firefox
+ - regex: '(Pale[Mm]oon)/(\d+)\.(\d+)\.?(\d+)?'
+ family_replacement: 'Pale Moon (Firefox Variant)'
+ - regex: '(Fennec)/(\d+)\.(\d+)\.?([ab]?\d+[a-z]*)'
+ family_replacement: 'Firefox Mobile'
+ - regex: '(Fennec)/(\d+)\.(\d+)(pre)'
+ family_replacment: 'Firefox Mobile'
+ - regex: '(Fennec)/(\d+)\.(\d+)'
+ family_replacement: 'Firefox Mobile'
+ - regex: 'Mobile.*(Firefox)/(\d+)\.(\d+)'
+ family_replacement: 'Firefox Mobile'
- regex: '(Namoroka|Shiretoko|Minefield)/(\d+)\.(\d+)\.(\d+(?:pre)?)'
family_replacement: 'Firefox ($1)'
-
- - regex: '(Firefox)/(\d+)\.(\d+)([ab]\d+[a-z]*)'
+ - regex: '(Firefox)/(\d+)\.(\d+)(a\d+[a-z]*)'
+ family_replacement: 'Firefox Alpha'
+ - regex: '(Firefox)/(\d+)\.(\d+)(b\d+[a-z]*)'
family_replacement: 'Firefox Beta'
-
- - regex: '(Firefox)-(?:\d+\.\d+)?/(\d+)\.(\d+)([ab]\d+[a-z]*)'
+ - regex: '(Firefox)-(?:\d+\.\d+)?/(\d+)\.(\d+)(a\d+[a-z]*)'
+ family_replacement: 'Firefox Alpha'
+ - regex: '(Firefox)-(?:\d+\.\d+)?/(\d+)\.(\d+)(b\d+[a-z]*)'
family_replacement: 'Firefox Beta'
-
- regex: '(Namoroka|Shiretoko|Minefield)/(\d+)\.(\d+)([ab]\d+[a-z]*)?'
family_replacement: 'Firefox ($1)'
-
- regex: '(Firefox).*Tablet browser (\d+)\.(\d+)\.(\d+)'
family_replacement: 'MicroB'
-
- regex: '(MozillaDeveloperPreview)/(\d+)\.(\d+)([ab]\d+[a-z]*)?'
# e.g.: Flock/2.0b2
@@ -35,8 +42,6 @@ user_agent_parsers:
- regex: '(RockMelt)/(\d+)\.(\d+)\.(\d+)'
# e.g.: Fennec/0.9pre
- - regex: '(Fennec)/(\d+)\.(\d+)(pre)'
-
- regex: '(Navigator)/(\d+)\.(\d+)\.(\d+)'
family_replacement: 'Netscape'
@@ -51,18 +56,21 @@ user_agent_parsers:
# Opera will stop at 9.80 and hide the real version in the Version string.
# see: http://dev.opera.com/articles/view/opera-ua-string-changes/
- - regex: '(Opera Tablet).*Version\/(\d+)\.(\d+)(?:\.(\d+))?'
-
+ - regex: '(Opera Tablet).*Version/(\d+)\.(\d+)(?:\.(\d+))?'
- regex: '(Opera)/.+Opera Mobi.+Version/(\d+)\.(\d+)'
family_replacement: 'Opera Mobile'
-
+ - regex: 'Opera Mobi'
+ family_replacement: 'Opera Mobile'
- regex: '(Opera Mini)/(\d+)\.(\d+)'
-
- - regex: '(Opera)/9.80.*Version\/(\d+)\.(\d+)(?:\.(\d+))?'
+ - regex: '(Opera Mini)/att/(\d+)\.(\d+)'
+ - regex: '(Opera)/9.80.*Version/(\d+)\.(\d+)(?:\.(\d+))?'
# Palm WebOS looks a lot like Safari.
+ - regex: '(webOSBrowser)/(\d+)\.(\d+)'
- regex: '(webOS)/(\d+)\.(\d+)'
- family_replacement: 'Palm webOS'
+ family_replacement: 'webOSBrowser'
+ - regex: '(wOSBrowser).+TouchPad/(\d+)\.(\d+)'
+ family_replacement: 'webOS TouchPad'
# LuaKit has no version info.
# http://luakit.org/projects/luakit/
@@ -92,13 +100,8 @@ user_agent_parsers:
- regex: '(konqueror)/(\d+)\.(\d+)\.(\d+)'
family_replacement: 'Konqueror'
- - regex: '(PlayBook).+RIM Tablet OS (\d+)\.(\d+)\.(\d+)'
-
- regex: '(WeTab)-Browser'
- - regex: '(wOSBrowser).+TouchPad/(\d+)\.(\d+)'
- family_replacement: 'webOS TouchPad'
-
- regex: '(Comodo_Dragon)/(\d+)\.(\d+)\.(\d+)'
family_replacement: 'Comodo Dragon'
@@ -108,78 +111,170 @@ user_agent_parsers:
# must go before NetFront below
- regex: '(Kindle)/(\d+)\.(\d+)'
- # must go before Android below.
- - regex: '(CrMo)/(\d+)\.(\d+)\.(\d+)'
+ - regex: '(Symphony) (\d+).(\d+)'
+
+ - regex: 'Minimo'
+
+ # Chrome Mobile
+ - regex: '(CrMo)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Chrome Mobile'
+ - regex: '(CriOS)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Chrome Mobile iOS'
+ - regex: '(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+) Mobile'
family_replacement: 'Chrome Mobile'
+ # Chrome Frame must come before MSIE.
+ - regex: '(chromeframe)/(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Chrome Frame'
+
+ # UC Browser
+ - regex: '(UC Browser)(\d+)\.(\d+)\.(\d+)'
+
+ # Tizen Browser (second case included in browser/major.minor regex)
+ - regex: '(SLP Browser)/(\d+)\.(\d+)'
+ family_replacement: 'Tizen Browser'
+
+ # Epiphany browser (identifies as Chromium)
+ - regex: '(Epiphany)/(\d+)\.(\d+).(\d+)'
+
+ # Sogou Explorer 2.X
+ - regex: '(SE 2\.X) MetaSr (\d+)\.(\d+)'
+ family_replacement: 'Sogou Explorer'
+
+ # Pingdom
+ - regex: '(Pingdom.com_bot_version_)(\d+)\.(\d+)'
+ family_replacement: 'PingdomBot'
+
+ # Facebook
+ - regex: '(facebookexternalhit)/(\d+)\.(\d+)'
+ family_replacement: 'FacebookBot'
+
+ # Twitterbot
+ - regex: '(Twitterbot)/(\d+)\.(\d+)'
+ family_replacement: 'TwitterBot'
#### END SPECIAL CASES TOP ####
#### MAIN CASES - this catches > 50% of all browsers ####
# Browser/major_version.minor_version.beta_version
- - regex: '(AdobeAIR|Chromium|FireWeb|Jasmine|ANTGalio|Midori|Fresco|Lobo|PaleMoon|Maxthon|Lynx|OmniWeb|Dillo|Camino|Demeter|Fluid|Fennec|Shiira|Sunrise|Chrome|Flock|Netscape|Lunascape|Epiphany|WebPilot|Vodafone|NetFront|Konqueror|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|Opera Mini|iCab|NetNewsWire|ThunderBrowse|Iron|Iris)/(\d+)\.(\d+)\.(\d+)'
+ - regex: '(AdobeAIR|Chromium|FireWeb|Jasmine|ANTGalio|Midori|Fresco|Lobo|PaleMoon|Maxthon|Lynx|OmniWeb|Dillo|Camino|Demeter|Fluid|Fennec|Shiira|Sunrise|Chrome|Flock|Netscape|Lunascape|WebPilot|NetFront|Netfront|Konqueror|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|Opera Mini|iCab|NetNewsWire|ThunderBrowse|Iron|Iris|UP\.Browser|Bunjaloo|Google Earth|Raven for Mac)/(\d+)\.(\d+)\.(\d+)'
# Browser/major_version.minor_version
- - regex: '(Bolt|Jasmine|IceCat|Skyfire|Midori|Maxthon|Lynx|Arora|IBrowse|Dillo|Camino|Shiira|Fennec|Phoenix|Chrome|Flock|Netscape|Lunascape|Epiphany|WebPilot|Opera Mini|Opera|Vodafone|NetFront|Konqueror|Googlebot|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|iCab|NetNewsWire|Iron|Space Bison|Stainless|Orca|Dolfin|BOLT)/(\d+)\.(\d+)'
+ - regex: '(Bolt|Jasmine|IceCat|Skyfire|Midori|Maxthon|Lynx|Arora|IBrowse|Dillo|Camino|Shiira|Fennec|Phoenix|Chrome|Flock|Netscape|Lunascape|Epiphany|WebPilot|Opera Mini|Opera|NetFront|Netfront|Konqueror|Googlebot|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|iCab|NetNewsWire|Iron|Space Bison|Stainless|Orca|Dolfin|BOLT|Minimo|Tizen Browser|Polaris)/(\d+)\.(\d+)'
# Browser major_version.minor_version.beta_version (space instead of slash)
- regex: '(iRider|Crazy Browser|SkipStone|iCab|Lunascape|Sleipnir|Maemo Browser) (\d+)\.(\d+)\.(\d+)'
# Browser major_version.minor_version (space instead of slash)
- - regex: '(iCab|Lunascape|Opera|Android) (\d+)\.(\d+)\.?(\d+)?'
-
+ - regex: '(iCab|Lunascape|Opera|Android|Jasmine|Polaris|BREW) (\d+)\.(\d+)\.?(\d+)?'
+
+ # weird android UAs
+ - regex: '(Android) Donut'
+ v1_replacement: '1'
+ v2_replacement: '2'
+
+ - regex: '(Android) Eclair'
+ v1_replacement: '2'
+ v2_replacement: '1'
+
+ - regex: '(Android) Froyo'
+ v1_replacement: '2'
+ v2_replacement: '2'
+
+ - regex: '(Android) Gingerbread'
+ v1_replacement: '2'
+ v2_replacement: '3'
+
+ - regex: '(Android) Honeycomb'
+ v1_replacement: '3'
+
# IE Mobile
- regex: '(IEMobile)[ /](\d+)\.(\d+)'
family_replacement: 'IE Mobile'
# desktop mode
# http://www.anandtech.com/show/3982/windows-phone-7-review
- regex: '(MSIE) (\d+)\.(\d+).*XBLWP7'
- family_replacement: 'IE Mobile'
+ family_replacement: 'IE Large Screen'
# AFTER THE EDGE CASES ABOVE!
- regex: '(Firefox)/(\d+)\.(\d+)\.(\d+)'
-
- regex: '(Firefox)/(\d+)\.(\d+)(pre|[ab]\d+[a-z]*)?'
+
#### END MAIN CASES ####
#### SPECIAL CASES ####
+ - regex: '(Obigo)InternetBrowser'
+ - regex: '(Obigo)\-Browser'
- regex: '(Obigo|OBIGO)[^\d]*(\d+)(?:.(\d+))?'
- family_replacement: 'Obigo'
- regex: '(MAXTHON|Maxthon) (\d+)\.(\d+)'
family_replacement: 'Maxthon'
-
- regex: '(Maxthon|MyIE2|Uzbl|Shiira)'
v1_replacement: '0'
- regex: '(PLAYSTATION) (\d+)'
family_replacement: 'PlayStation'
-
- regex: '(PlayStation Portable)[^\d]+(\d+).(\d+)'
- regex: '(BrowseX) \((\d+)\.(\d+)\.(\d+)'
+ # Polaris/d.d is above
- regex: '(POLARIS)/(\d+)\.(\d+)'
family_replacement: 'Polaris'
+ - regex: '(Embider)/(\d+)\.(\d+)'
+ family_replacement: 'Polaris'
- regex: '(BonEcho)/(\d+)\.(\d+)\.(\d+)'
family_replacement: 'Bon Echo'
- regex: '(iPod).+Version/(\d+)\.(\d+)\.(\d+)'
-
- - regex: '(iPhone) OS (\d+)_(\d+)(?:_(\d+))?'
-
- - regex: '(iPad).+ OS (\d+)_(\d+)(?:_(\d+))?'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPod).*Version/(\d+)\.(\d+)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPod)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPhone).*Version/(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPhone).*Version/(\d+)\.(\d+)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPhone)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPad).*Version/(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPad).*Version/(\d+)\.(\d+)'
+ family_replacement: 'Mobile Safari'
+ - regex: '(iPad)'
+ family_replacement: 'Mobile Safari'
+
+ - regex: '(AvantGo) (\d+).(\d+)'
- regex: '(Avant)'
v1_replacement: '1'
+ # nokia browsers
+ # based on: http://www.developer.nokia.com/Community/Wiki/User-Agent_headers_for_Nokia_devices
+ - regex: '^(Nokia)'
+ family_replacement: 'Nokia Services (WAP) Browser'
+ - regex: '(NokiaBrowser)/(\d+)\.(\d+).(\d+)\.(\d+)'
+ - regex: '(NokiaBrowser)/(\d+)\.(\d+).(\d+)'
+ - regex: '(NokiaBrowser)/(\d+)\.(\d+)'
+ - regex: '(BrowserNG)/(\d+)\.(\d+).(\d+)'
+ family_replacement: 'NokiaBrowser'
+ - regex: '(Series60)/5\.0'
+ family_replacement: 'NokiaBrowser'
+ v1_replacement: '7'
+ v2_replacement: '0'
+ - regex: '(Series60)/(\d+)\.(\d+)'
+ family_replacement: 'Nokia OSS Browser'
+ - regex: '(S40OviBrowser)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Nokia Series 40 Ovi Browser'
- regex: '(Nokia)[EN]?(\d+)'
# BlackBerry devices
- - regex: '(Black[bB]erry).+Version\/(\d+)\.(\d+)\.(\d+)'
- family_replacement: 'Blackberry'
-
+ - regex: '(PlayBook).+RIM Tablet OS (\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Blackberry WebKit'
+ - regex: '(Black[bB]erry).+Version/(\d+)\.(\d+)\.(\d+)'
+ family_replacement: 'Blackberry WebKit'
- regex: '(Black[bB]erry)\s?(\d+)'
family_replacement: 'Blackberry'
@@ -201,6 +296,10 @@ user_agent_parsers:
# Amazon Silk, should go before Safari
- regex: '(Silk)/(\d+)\.(\d+)(?:\.([0-9\-]+))?'
+ # WebKit Nightly
+ - regex: '(AppleWebKit)/(\d+)\.?(\d+)?\+ .* Version/\d+\.\d+.\d+ Safari/'
+ family_replacement: 'WebKit Nightly'
+
# Safari
- regex: '(Version)/(\d+)\.(\d+)(?:\.(\d+))?.*Safari/'
family_replacement: 'Safari'
@@ -212,19 +311,42 @@ user_agent_parsers:
- regex: '(OLPC)/Update()\.(\d+)'
v1_replacement: '0'
- - regex: '(SamsungSGHi560)'
- family_replacement: 'Samsung SGHi560'
-
- - regex: '^(SonyEricssonK800i)'
- family_replacement: 'Sony Ericsson K800i'
+ - regex: '(SEMC\-Browser)/(\d+)\.(\d+)'
- - regex: '(Teleca Q7)'
+ - regex: '(Teleca)'
+ family_replacement: 'Teleca Browser'
- regex: '(MSIE) (\d+)\.(\d+)'
family_replacement: 'IE'
+os_parsers:
+
+ ##########
+ # Android
+ # can actually detect rooted android os. do we care?
+ ##########
+ - regex: '(Android) (\d+)\.(\d+)(?:[.\-]([a-z0-9]+))?'
+ - regex: '(Android)\-(\d+)\.(\d+)(?:[.\-]([a-z0-9]+))?'
+
+ - regex: '(Android) Donut'
+ os_v1_replacement: '1'
+ os_v2_replacement: '2'
+
+ - regex: '(Android) Eclair'
+ os_v1_replacement: '2'
+ os_v2_replacement: '1'
+
+ - regex: '(Android) Froyo'
+ os_v1_replacement: '2'
+ os_v2_replacement: '2'
+
+ - regex: '(Android) Gingerbread'
+ os_v1_replacement: '2'
+ os_v2_replacement: '3'
+
+ - regex: '(Android) Honeycomb'
+ os_v1_replacement: '3'
-os_parsers:
##########
# Windows
# http://en.wikipedia.org/wiki/Windows_NT#Releases
@@ -233,118 +355,110 @@ os_parsers:
# lots of ua strings have Windows NT 4.1 !?!?!?!? !?!? !? !????!?! !!! ??? !?!?! ?
# (very) roughly ordered in terms of frequency of occurence of regex (win xp currently most frequent, etc)
##########
+ - regex: '(Windows Phone 6\.5)'
+
- regex: '(Windows (?:NT 5\.2|NT 5\.1))'
os_replacement: 'Windows XP'
-
- # ie mobile des ktop mode
- # spoofs nt 6.1. must come before windows 7
+
+ # ie mobile des ktop mode
+ # spoofs nt 6.1. must come before windows 7
- regex: '(XBLWP7)'
os_replacement: 'Windows Phone OS'
-
+
- regex: '(Windows NT 6\.1)'
os_replacement: 'Windows 7'
-
+
- regex: '(Windows NT 6\.0)'
os_replacement: 'Windows Vista'
-
+
- regex: '(Windows 98|Windows XP|Windows ME|Windows 95|Windows CE|Windows 7|Windows NT 4\.0|Windows Vista|Windows 2000)'
-
+
# is this a spoof or is nt 6.2 out and about in some capacity?
- regex: '(Windows NT 6\.2)'
os_replacement: 'Windows 8'
-
+
- regex: '(Windows NT 5\.0)'
os_replacement: 'Windows 2000'
-
+
- regex: '(Windows Phone OS) (\d+)\.(\d+)'
-
+
- regex: '(Windows ?Mobile)'
os_replacement: 'Windows Mobile'
-
+
- regex: '(WinNT4.0)'
os_replacement: 'Windows NT 4.0'
-
+
- regex: '(Win98)'
os_replacement: 'Windows 98'
-
+
##########
- # Android
- # can actually detect rooted android os. do we care?
+ # Tizen OS from Samsung
+ # spoofs Android so pushing it above
##########
- - regex: '(Android) (\d+)\.(\d+)(?:[.\-]([a-z0-9]+))?'
-
- - regex: '(Android) Donut'
- os_v1_replacement: '1'
- os_v2_replacement: '2'
-
- - regex: '(Android) Eclair'
- os_v1_replacement: '2'
- os_v2_replacement: '1'
-
- - regex: '(Android) Froyo'
- os_v1_replacement: '2'
- os_v2_replacement: '2'
-
- - regex: '(Android) Gingerbread'
- os_v1_replacement: '2'
- os_v2_replacement: '3'
-
- - regex: '(Android) Honeycomb'
- os_v1_replacement: '3'
-
+ - regex: '(Tizen)/(\d+)\.(\d+)'
+
+
+
##########
# Mac OS
# http://en.wikipedia.org/wiki/Mac_OS_X#Versions
##########
- regex: '(Mac OS X) (\d+)[_.](\d+)(?:[_.](\d+))?'
-
+
# builds before tiger don't seem to specify version?
-
+
# ios devices spoof (mac os x), so including intel/ppc prefixes
- regex: '(?:PPC|Intel) (Mac OS X)'
-
+
##########
# iOS
# http://en.wikipedia.org/wiki/IOS_version_history
##########
- - regex: '(CPU OS|iPhone OS) (\d+)_(\d+)(?:_(\d+))?'
+ - regex: '(CPU OS|iPhone OS) (\d+)_(\d+)(?:_(\d+))?'
os_replacement: 'iOS'
-
+
# remaining cases are mostly only opera uas, so catch opera as to not catch iphone spoofs
- regex: '(iPhone|iPad|iPod); Opera'
os_replacement: 'iOS'
-
+
# few more stragglers
- - regex: '(iPhone|iPad|iPod).*Mac OS X'
+ - regex: '(iPhone|iPad|iPod).*Mac OS X.*Version/(\d+)\.(\d+)'
os_replacement: 'iOS'
-
+
##########
# Chrome OS
- # if version 0.0.0, probably this stuff:
+ # if version 0.0.0, probably this stuff:
# http://code.google.com/p/chromium-os/issues/detail?id=11573
# http://code.google.com/p/chromium-os/issues/detail?id=13790
##########
- regex: '(CrOS) [a-z0-9_]+ (\d+)\.(\d+)(?:\.(\d+))?'
os_replacement: 'Chrome OS'
-
+
##########
# Linux distros
##########
- regex: '(Debian)-(\d+)\.(\d+)\.(\d+)(?:\.(\d+))?'
- regex: '(Linux Mint)(?:/(\d+))?'
- regex: '(Mandriva)(?: Linux)?/(\d+)\.(\d+)\.(\d+)(?:\.(\d+))?'
-
+
##########
# Symbian + Symbian OS
# http://en.wikipedia.org/wiki/History_of_Symbian
##########
- regex: '(Symbian[Oo][Ss])/(\d+)\.(\d+)'
os_replacement: 'Symbian OS'
+ - regex: '(Symbian/3).+NokiaBrowser/7\.3'
+ os_replacement: 'Symbian^3 Anna'
+ - regex: '(Symbian/3).+NokiaBrowser/7\.4'
+ os_replacement: 'Symbian^3 Belle'
- regex: '(Symbian/3)'
os_replacement: 'Symbian^3'
- regex: '(Series 60|SymbOS|S60)'
os_replacement: 'Symbian OS'
-
+ - regex: '(MeeGo)'
+ - regex: 'Symbian [Oo][Ss]'
+ os_replacement: 'Symbian OS'
+
##########
# BlackBerry devices
##########
@@ -361,23 +475,23 @@ os_parsers:
##########
# Misc mobile
- ##########
+ ##########
- regex: '(webOS|hpwOS)/(\d+)\.(\d+)(?:\.(\d+))?'
os_replacement: 'webOS'
-
+
##########
# Generic patterns
# since the majority of os cases are very specific, these go last
##########
# first.second.third.fourth bits
- regex: '(SUSE|Fedora|Red Hat|PCLinuxOS)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
-
+
# first.second.third bits
- regex: '(SUSE|Fedora|Red Hat|Puppy|PCLinuxOS|CentOS)/(\d+)\.(\d+)\.(\d+)'
-
+
# first.second bits
- regex: '(Ubuntu|Kindle|Bada|Lubuntu|BackTrack|Red Hat|Slackware)/(\d+)\.(\d+)'
-
+
# just os
- regex: '(Windows|OpenBSD|FreeBSD|NetBSD|Ubuntu|Kubuntu|Android|Arch Linux|CentOS|WeTab|Slackware)'
- regex: '(Linux|BSD)'
@@ -395,63 +509,316 @@ device_parsers:
# http://en.wikipedia.org/wiki/List_of_HTC_phones
# this is quickly getting unwieldy
##########
- # SPECIFIC CASES
- - regex: '(HTC_VDA_V)'
- device_replacement: 'HTC S710'
- - regex: '(HTC-P4600)'
- device_replacement: 'HTC Touch Pro'
- - regex: '(HTCX06HT)'
- device_replacement: 'HTC Desire'
- - regex: '(HTC-A6366)'
- device_replacement: 'HTC Aria'
- - regex: 'HTC; (7 Mozart|T8697)'
- device_replacement: 'HTC 7 Mozart'
- - regex: '(HTC-ST7377)'
- device_replacement: 'HTC Touch Pro2'
- - regex: 'HTC; 7 Trophy'
- device_replacement: 'HTC 7 Trophy'
- - regex: 'HTC; HD7'
- device_replacement: 'HTC HD7'
-
# example: Mozilla/5.0 (Linux; U; Android 2.3.2; fr-fr; HTC HD2 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
- regex: 'HTC ([A-Z][a-z0-9]+) Build'
- device_replacement: 'HTC ($1)'
+ device_replacement: 'HTC $1'
# example: Mozilla/5.0 (Linux; U; Android 2.1; es-es; HTC Legend 1.23.161.1 Build/ERD79) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17,gzip
- regex: 'HTC ([A-Z][a-z0-9 ]+) \d+\.\d+\.\d+\.\d+'
- device_replacement: 'HTC ($1) ($2)'
+ device_replacement: 'HTC $1'
# example: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; HTC_Touch_Diamond2_T5353; Windows Phone 6.5.3.5)
- regex: 'HTC_Touch_([A-Za-z0-9]+)'
device_replacement: 'HTC Touch ($1)'
# should come after HTC_Touch
+ - regex: 'USCCHTC(\d+)'
+ device_replacement: 'HTC $1 (US Cellular)'
+ - regex: 'Sprint APA(9292)'
+ device_replacement: 'HTC $1 (Sprint)'
+ - regex: 'HTC ([A-Za-z0-9]+ [A-Z])'
+ device_replacement: HTC $1
+ - regex: 'HTC-([A-Za-z0-9]+)'
+ device_replacement: 'HTC $1'
- regex: 'HTC_([A-Za-z0-9]+)'
- device: 'HTC ($1)'
+ device_replacement: 'HTC $1'
+ - regex: 'HTC ([A-Za-z0-9]+)'
+ device_replacement: 'HTC $1'
+ - regex: '(ADR[A-Za-z0-9]+)'
+ device_replacement: 'HTC $1'
- regex: '(HTC)'
+ #########
+ # Ericsson - must come before nokia since they also use symbian
+ #########
+ - regex: 'SonyEricsson([A-Za-z0-9]+)/'
+ device_replacement: 'Ericsson $1'
+
+ #########
+ # Android General Device Matching (far from perfect)
+ #########
+ - regex: 'Android[\- ][\d]+\.[\d]+\; [A-Za-z]{2}\-[A-Za-z]{2}\; WOWMobile (.+) Build'
+ - regex: 'Android[\- ][\d]+\.[\d]+\.[\d]+; [A-Za-z]{2}\-[A-Za-z]{2}\; (.+) Build'
+ - regex: 'Android[\- ][\d]+\.[\d]+\-update1\; [A-Za-z]{2}\-[A-Za-z]{2}\; (.+) Build'
+ - regex: 'Android[\- ][\d]+\.[\d]+\; [A-Za-z]{2}\-[A-Za-z]{2}\; (.+) Build'
+ - regex: 'Android[\- ][\d]+\.[\d]+\.[\d]+; (.+) Build'
+
##########
- # NOKIA
+ # NOKIA
# nokia NokiaN8-00 comes before iphone. sometimes spoofs iphone
##########
- - regex: 'Nokia(?!Browser)([A-Za-z0-9\-])+'
- device_replacement: 'Nokia ($1)'
+ - regex: 'NokiaN([0-9]+)'
+ device_replacement: 'Nokia N$1'
+ - regex: 'Nokia([A-Za-z0-9\v-]+)'
+ device_replacement: 'Nokia $1'
+ - regex: 'NOKIA ([A-Za-z0-9\-]+)'
+ device_replacement: 'Nokia $1'
- regex: 'Nokia ([A-Za-z0-9\-]+)'
- device_replacement: 'Nokia ($1)'
+ device_replacement: 'Nokia $1'
+ - regex: 'Lumia ([A-Za-z0-9\-]+)'
+ device_replacement: 'Lumia $1'
+ - regex: 'Symbian'
+ device_replacement: 'Nokia'
##########
- # incomplete!
- # PALM
+ # Blackberry
+ # http://www.useragentstring.com/pages/BlackBerry/
+ ##########
+ - regex: '(PlayBook).+RIM Tablet OS'
+ device_replacement: 'Blackberry Playbook'
+ - regex: '(Black[Bb]erry [0-9]+);'
+ - regex: 'Black[Bb]erry([0-9]+)'
+ device_replacement: 'BlackBerry $1'
+
+ ##########
+ # PALM / HP
##########
# some palm devices must come before iphone. sometimes spoofs iphone in ua
- regex: '(Pre)/(\d+)\.(\d+)'
device_replacement: 'Palm Pre'
- regex: '(Pixi)/(\d+)\.(\d+)'
device_replacement: 'Palm Pixi'
+ - regex: '(Touchpad)/(\d+)\.(\d+)'
+ device_replacement: 'HP Touchpad'
+ - regex: 'HPiPAQ([A-Za-z0-9]+)/(\d+).(\d+)'
+ device_replacement: 'HP iPAQ $1'
+ - regex: 'Palm([A-Za-z0-9]+)'
+ device_replacement: 'Palm $1'
+ - regex: 'Treo([A-Za-z0-9]+)'
+ device_replacement: 'Palm Treo $1'
+ - regex: 'webOS.*(P160UNA)/(\d+).(\d+)'
+ device_replacement: 'HP Veer'
+
+ ##########
+ # incomplete!
+ # Kindle
+ ##########
+ - regex: '(Kindle Fire)'
+ - regex: '(Kindle)'
+ - regex: '(Silk)/(\d+)\.(\d+)(?:\.([0-9\-]+))?'
+ device_replacement: 'Kindle Fire'
##########
# complete but probably catches spoofs
- # iSTUFF
+ # iSTUFF
##########
# ipad and ipod must be parsed before iphone
# cannot determine specific device type from ua string. (3g, 3gs, 4, etc)
+ - regex: '(iPad) Simulator;'
- regex: '(iPad);'
- regex: '(iPod);'
+ - regex: '(iPhone) Simulator;'
- regex: '(iPhone);'
+
+ ##########
+ # Acer
+ ##########
+ - regex: 'acer_([A-Za-z0-9]+)_'
+ device_replacement: 'Acer $1'
+ - regex: 'acer_([A-Za-z0-9]+)_'
+ device_replacement: 'Acer $1'
+
+ ##########
+ # Amoi
+ ##########
+ - regex: 'Amoi\-([A-Za-z0-9]+)'
+ device_replacement: 'Amoi $1'
+ - regex: 'AMOI\-([A-Za-z0-9]+)'
+ device_replacement: 'Amoi $1'
+
+ ##########
+ # Amoi
+ ##########
+ - regex: 'Asus\-([A-Za-z0-9]+)'
+ device_replacement: 'Asus $1'
+ - regex: 'ASUS\-([A-Za-z0-9]+)'
+ device_replacement: 'Asus $1'
+
+ ##########
+ # Bird
+ ##########
+ - regex: 'BIRD\-([A-Za-z0-9]+)'
+ device_replacement: 'Bird $1'
+ - regex: 'BIRD\.([A-Za-z0-9]+)'
+ device_replacement: 'Bird $1'
+ - regex: 'BIRD ([A-Za-z0-9]+)'
+ device_replacement: 'Bird $1'
+
+ ##########
+ # Dell
+ ##########
+ - regex: 'Dell ([A-Za-z0-9]+)'
+ device_replacement: 'Dell $1'
+
+ ##########
+ # DoCoMo
+ ##########
+ - regex: 'DoCoMo/2\.0 ([A-Za-z0-9]+)'
+ device_replacement: 'DoCoMo $1'
+ - regex: '([A-Za-z0-9]+)\_W\;FOMA'
+ device_replacement: 'DoCoMo $1'
+ - regex: '([A-Za-z0-9]+)\;FOMA'
+ device_replacement: 'DoCoMo $1'
+
+ ##########
+ # Huawei Vodafone
+ ##########
+ - regex: 'vodafone([A-Za-z0-9]+)'
+ device_replacement: 'Huawei Vodafone $1'
+
+ ##########
+ # i-mate
+ ##########
+ - regex: 'i\-mate ([A-Za-z0-9]+)'
+ device_replacement: 'i-mate $1'
+
+ ##########
+ # kyocera
+ ##########
+ - regex: 'Kyocera\-([A-Za-z0-9]+)'
+ device_replacement: 'Kyocera $1'
+ - regex: 'KWC\-([A-Za-z0-9]+)'
+ device_replacement: 'Kyocera $1'
+
+ ##########
+ # lenovo
+ ##########
+ - regex: 'Lenovo\-([A-Za-z0-9]+)'
+ device_replacement: 'Lenovo $1'
+ - regex: 'Lenovo\_([A-Za-z0-9]+)'
+ device_replacement: 'Lenovo $1'
+
+ ##########
+ # lg
+ ##########
+ - regex: 'LG/([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LG-LG([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LGE-LG([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LGE VX([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LG ([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LGE LG\-AX([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LG\-([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LGE\-([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+ - regex: 'LG([A-Za-z0-9]+)'
+ device_replacement: 'LG $1'
+
+ ##########
+ # kin
+ ##########
+ - regex: '(KIN)\.One (\d+)\.(\d+)'
+ device_replacement: 'Microsoft $1'
+ - regex: '(KIN)\.Two (\d+)\.(\d+)'
+ device_replacement: 'Microsoft $1'
+
+ ##########
+ # motorola
+ ##########
+ - regex: '(Motorola)\-([A-Za-z0-9]+)'
+ - regex: 'MOTO\-([A-Za-z0-9]+)'
+ device_replacement: 'Motorola $1'
+ - regex: 'MOT\-([A-Za-z0-9]+)'
+ device_replacement: 'Motorola $1'
+
+ ##########
+ # philips
+ ##########
+ - regex: 'Philips([A-Za-z0-9]+)'
+ device_replacement: 'Philips $1'
+ - regex: 'Philips ([A-Za-z0-9]+)'
+ device_replacement: 'Philips $1'
+
+ ##########
+ # Samsung
+ ##########
+ - regex: 'SAMSUNG-([A-Za-z0-9\-]+)'
+ device_replacement: 'Samsung $1'
+ - regex: 'SAMSUNG\; ([A-Za-z0-9\-]+)'
+ device_replacement: 'Samsung $1'
+
+ ##########
+ # Softbank
+ ##########
+ - regex: 'Softbank/1\.0/([A-Za-z0-9]+)'
+ device_replacement: 'Softbank $1'
+ - regex: 'Softbank/2\.0/([A-Za-z0-9]+)'
+ device_replacement: 'Softbank $1'
+
+ ##########
+ # Generic Smart Phone
+ ##########
+ - regex: '(hiptop|avantgo|plucker|xiino|blazer|elaine|up.browser|up.link|mmp|smartphone|midp|wap|vodafone|o2|pocket|mobile|pda)'
+ device_replacement: "Generic Smartphone"
+
+ ##########
+ # Generic Feature Phone
+ ##########
+ - regex: '^(1207|3gso|4thp|501i|502i|503i|504i|505i|506i|6310|6590|770s|802s|a wa|acer|acs\-|airn|alav|asus|attw|au\-m|aur |aus |abac|acoo|aiko|alco|alca|amoi|anex|anny|anyw|aptu|arch|argo|bell|bird|bw\-n|bw\-u|beck|benq|bilb|blac|c55/|cdm\-|chtm|capi|comp|cond|craw|dall|dbte|dc\-s|dica|ds\-d|ds12|dait|devi|dmob|doco|dopo|el49|erk0|esl8|ez40|ez60|ez70|ezos|ezze|elai|emul|eric|ezwa|fake|fly\-|fly\_|g\-mo|g1 u|g560|gf\-5|grun|gene|go.w|good|grad|hcit|hd\-m|hd\-p|hd\-t|hei\-|hp i|hpip|hs\-c|htc |htc\-|htca|htcg)'
+ device_replacement: 'Generic Feature Phone'
+ - regex: '^(htcp|htcs|htct|htc\_|haie|hita|huaw|hutc|i\-20|i\-go|i\-ma|i230|iac|iac\-|iac/|ig01|im1k|inno|iris|jata|java|kddi|kgt|kgt/|kpt |kwc\-|klon|lexi|lg g|lg\-a|lg\-b|lg\-c|lg\-d|lg\-f|lg\-g|lg\-k|lg\-l|lg\-m|lg\-o|lg\-p|lg\-s|lg\-t|lg\-u|lg\-w|lg/k|lg/l|lg/u|lg50|lg54|lge\-|lge/|lynx|leno|m1\-w|m3ga|m50/|maui|mc01|mc21|mcca|medi|meri|mio8|mioa|mo01|mo02|mode|modo|mot |mot\-|mt50|mtp1|mtv |mate|maxo|merc|mits|mobi|motv|mozz|n100|n101|n102|n202|n203|n300|n302|n500|n502|n505|n700|n701|n710|nec\-|nem\-|newg|neon)'
+ device_replacement: 'Generic Feature Phone'
+ - regex: '^(netf|noki|nzph|o2 x|o2\-x|opwv|owg1|opti|oran|ot\-s|p800|pand|pg\-1|pg\-2|pg\-3|pg\-6|pg\-8|pg\-c|pg13|phil|pn\-2|pt\-g|palm|pana|pire|pock|pose|psio|qa\-a|qc\-2|qc\-3|qc\-5|qc\-7|qc07|qc12|qc21|qc32|qc60|qci\-|qwap|qtek|r380|r600|raks|rim9|rove|s55/|sage|sams|sc01|sch\-|scp\-|sdk/|se47|sec\-|sec0|sec1|semc|sgh\-|shar|sie\-|sk\-0|sl45|slid|smb3|smt5|sp01|sph\-|spv |spv\-|sy01|samm|sany|sava|scoo|send|siem|smar|smit|soft|sony|t\-mo|t218|t250|t600|t610|t618|tcl\-|tdg\-|telm|tim\-|ts70|tsm\-|tsm3|tsm5|tx\-9|tagt)'
+ device_replacement: 'Generic Feature Phone'
+ - regex: '^(talk|teli|topl|tosh|up.b|upg1|utst|v400|v750|veri|vk\-v|vk40|vk50|vk52|vk53|vm40|vx98|virg|vite|voda|vulc|w3c |w3c\-|wapj|wapp|wapu|wapm|wig |wapi|wapr|wapv|wapy|wapa|waps|wapt|winc|winw|wonu|x700|xda2|xdag|yas\-|your|zte\-|zeto|aste|audi|avan|blaz|brew|brvw|bumb|ccwa|cell|cldc|cmd\-|dang|eml2|fetc|hipt|http|ibro|idea|ikom|ipaq|jbro|jemu|jigs|keji|kyoc|kyok|libw|m\-cr|midp|mmef|moto|mwbp|mywa|newt|nok6|o2im|pant|pdxg|play|pluc|port|prox|rozo|sama|seri|smal|symb|treo|upsi|vx52|vx53|vx60|vx61|vx70|vx80|vx81|vx83|vx85|wap\-|webc|whit|wmlb|xda\-|xda\_)'
+ device_replacement: 'Generic Feature Phone'
+
+ ##########
+ # Spiders (this is hack...)
+ ##########
+ - regex: '(bot|borg|google(^tv)|yahoo|slurp|msnbot|msrbot|openbot|archiver|netresearch|lycos|scooter|altavista|teoma|gigabot|baiduspider|blitzbot|oegp|charlotte|furlbot|http%20client|polybot|htdig|ichiro|mogimogi|larbin|pompos|scrubby|searchsight|seekbot|semanticdiscovery|silk|snappy|speedy|spider|voila|vortex|voyager|zao|zeal|fast\-webcrawler|converacrawler|dataparksearch|findlinks)'
+ device_replacement: 'Spider'
+
+
+mobile_user_agent_families:
+ - 'Firefox Mobile'
+ - 'Opera Mobile'
+ - 'Opera Mini'
+ - 'Mobile Safari'
+ - 'webOS'
+ - 'IE Mobile'
+ - 'Playstation Portable'
+ - 'Nokia'
+ - 'Blackberry'
+ - 'Palm'
+ - 'Silk'
+ - 'Android'
+ - 'Maemo'
+ - 'Obigo'
+ - 'Netfront'
+ - 'AvantGo'
+ - 'Teleca'
+ - 'SEMC-Browser'
+ - 'Bolt'
+ - 'Iris'
+ - 'UP.Browser'
+ - 'Symphony'
+ - 'Minimo'
+ - 'Bunjaloo'
+ - 'Jasmine'
+ - 'Dolfin'
+ - 'Polaris'
+ - 'BREW'
+ - 'Chrome Mobile'
+ - 'UC Browser'
+ - 'Tizen Browser'
+
+# Some select mobile OSs report a desktop browser.
+# make sure we note they're mobile
+mobile_os_families:
+ - 'Windows Phone 6.5'
+ - 'Windows CE'
+ - 'Symbian OS'
View
48 test/src/TestSuite.hs
@@ -1,6 +1,6 @@
-{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TemplateHaskell #-}
module Main where
@@ -34,11 +34,11 @@ $(derives [makeNFData] [''UAResult])
-main = benchMain
- -- arg <- getArgs
- -- case arg of
- -- ["bench"] -> benchMain
- -- _ -> testMain
+main = do
+ arg <- getArgs
+ case arg of
+ ["bench"] -> benchMain
+ _ -> testMain
@@ -56,20 +56,20 @@ benchMain = do
ua = bench "UA Parsing" $ nf (map (parseUA conf . uatcString)) allC
print $ show (length allC) ++ " strings being parsed."
C.defaultMain [ua]
-
+
-------------
-- Testing --
-------------
-
+
testMain = T.defaultMain tests
- where
- tests =
- [ uaTests
+ where
+ tests =
+ [ uaTests
, osTests ]
-
+
----------------
-- UA Testing --
@@ -83,7 +83,7 @@ uaTests = buildTest $ do
cases2 <- loadTests "resources/firefox_user_agent_strings.yaml"
let allC = cases ++ cases2
return $ testGroup "UA Parsing Tests" $ map (testUAParser conf) allC
-
+
-------------------------------------------------------------------------------
@@ -96,7 +96,7 @@ testUAParser config UATC{..} = testCase tn $ do
-- assertEqual "v1 is the same" uatcV1 uarV1
-- assertEqual "v2 is the same" uatcV2 uarV2
-- assertEqual "v3 is the same" uatcV3 uarV3
- where
+ where
parsed = parseUA config uatcString
tn = T.unpack $ T.intercalate "/" ["UA Test: ", uatcFamily, m uatcV1, m uatcV2, m uatcV3]
m x = maybe "-" id x
@@ -113,7 +113,7 @@ osTests = buildTest $ do
conf <- loadConfig "../resources/user_agent_parser.yaml"
cases <- loadTests "resources/test_user_agent_parser_os.yaml"
return $ testGroup "OS Parsing Tests" $ map (testOSParser conf) cases
-
+
-------------------------------------------------------------------------------
@@ -126,9 +126,9 @@ testOSParser config OSTC{..} = testCase tn $ do
-- assertEqual "v1 is the same" uatcV1 uarV1
-- assertEqual "v2 is the same" uatcV2 uarV2
-- assertEqual "v3 is the same" uatcV3 uarV3
- where
+ where
parsed = parseOS config ostcString
- tn = T.unpack $ T.intercalate "/"
+ tn = T.unpack $ T.intercalate "/"
["OS Test: ", ostcFamily, m ostcV1, m ostcV2, m ostcV3, m ostcV4]
m x = maybe "-" id x
@@ -143,7 +143,7 @@ loadTests fp = do
case xs of
Nothing -> error "Can't load test file"
Just xs' -> return xs'
-
+
-------------------------------------------------------------------------------
data UserAgentTestCase = UATC {
@@ -153,11 +153,11 @@ data UserAgentTestCase = UATC {
, uatcV2 :: Maybe Text
, uatcV3 :: Maybe Text
} deriving (Show)
-
+
-------------------------------------------------------------------------------
instance FromJSON UserAgentTestCase where
- parseJSON (Object v) =
+ parseJSON (Object v) =
UATC <$> v .: "user_agent_string"
<*> (v .: "family" <|> return "")
<*> (v .:? "v1" <|> return Nothing)
@@ -178,12 +178,12 @@ data OSTestCase = OSTC {
-------------------------------------------------------------------------------
instance FromJSON OSTestCase where
- parseJSON (Object v) =
+ parseJSON (Object v) =
OSTC <$> (v .: "user_agent_string" <|> return "")
<*> (v .: "os" <|> return "")
<*> (v .:? "os_v1" <|> return Nothing)
<*> (v .:? "os_v2" <|> return Nothing)
<*> (v .:? "os_v3" <|> return Nothing)
<*> (v .:? "os_v4" <|> return Nothing)
-
-
+
+
View
4 ua-parser.cabal
@@ -1,5 +1,5 @@
Name: ua-parser
-Version: 0.1
+Version: 0.1.1
Synopsis: A user agent string parser in Haskell
License: BSD3
License-file: LICENSE
@@ -7,7 +7,7 @@ Author: Ozgun Ataman
Maintainer: ozgun.ataman@soostone.com
Category: Web
Build-type: Simple
-Cabal-version: >=1.2
+Cabal-version: >=1.6
data-files:
Please sign in to comment.
Something went wrong with that request. Please try again.