This repository has been archived by the owner on Dec 1, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 38
/
impl.py
133 lines (102 loc) · 3.72 KB
/
impl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import logging
import re
logger = logging.getLogger(__name__)
class UnableToParse(Exception):
pass
class UserAgentParser(metaclass=abc.ABCMeta):
@property
@abc.abstractmethod
def name(self):
"""
Returns the name of this parser, useful for things like logging etc.
"""
@abc.abstractmethod
def __call__(self, ua):
"""
Actually parses the user agent, returning a dictionary containing all of the
relevant parsed information. If this method is unable to parse the user agent
then it can raise a ``UnableToParse`` exception to indicate that it can't parse
the given UA.
"""
class CallbackUserAgentParser(UserAgentParser):
def __init__(self, callback, *, name=None):
if name is None:
name = callback.__name__
self._callback = callback
self._name = name
@property
def name(self):
return self._name
def __call__(self, ua):
return self._callback(ua)
def ua_parser(fn):
return CallbackUserAgentParser(fn)
class RegexUserAgentParser(UserAgentParser):
def __init__(self, regexes, handler, *, name=None):
if name is None:
name = handler.__name__
self._regexes = [
re.compile(regex) if isinstance(regex, str) else regex for regex in regexes
]
self._handler = handler
self._name = name
@property
def name(self):
return self._name
def __call__(self, user_agent):
for regex in self._regexes:
matched = regex.search(user_agent)
# If we've matched this particuar regex, then we'll break the loop here and
# go onto finishing parsing.
if matched is not None:
break
else:
# None of our regexes matched.
raise UnableToParse
# We need to build up the args, and kwargs of our function, we call any unnamed
# group an arg, and pass them in, in order, and we call any named group a kwarg
# and we pass them in by name.
group_to_name = {v: k for k, v in matched.re.groupindex.items()}
args, kwargs = [], {}
for i, value in enumerate(matched.groups(), start=1):
name = group_to_name.get(i)
if name is not None:
kwargs[name] = value
else:
args.append(value)
# Finally, we'll call our handler with our parsed arguments, and return whatever
# result it gives us.
return self._handler(*args, **kwargs)
def regex_ua_parser(*regexes):
def deco(fn):
return RegexUserAgentParser(regexes, fn)
return deco
class ParserSet:
def __init__(self):
self._parsers = set()
def register(self, parser):
self._parsers.add(parser)
return parser
def __call__(self, user_agent):
for parser in self._parsers:
try:
return parser(user_agent)
except UnableToParse:
pass
except Exception:
logger.error(
"Error parsing %r as a %s.", user_agent, parser.name, exc_info=True
)
raise UnableToParse