Skip to content

Commit

Permalink
Use the API to get external links, correct UTM regex
Browse files Browse the repository at this point in the history
  • Loading branch information
theopolisme committed Jul 31, 2013
1 parent 9815530 commit 05d3c4d
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions nomoretracking.py
Expand Up @@ -17,13 +17,12 @@

# CC-BY-SA Theopolisme

URL = re.compile(r"""((?:\w+:)?\/\/[^<>\[\]\s"]+)""",flags=re.UNICODE|re.DOTALL)
UTM = re.compile(r"""[\?&]utm_.*?=.*?(?=\s|&|$)""",flags=re.UNICODE|re.DOTALL)
UTM = re.compile(r"""[\?&]utm_.*?=.*?(?=\s|&|$|])""",flags=re.UNICODE|re.DOTALL)

def process(page):
contents = page.edit()
contents_compare = contents
links = re.findall(URL,contents)
links = site.api('parse',text=contents,prop="externallinks")['parse']['externallinks']
for link in links:
if link.find("utm") != -1:
html_doc = requests.get(link).text
Expand Down

0 comments on commit 05d3c4d

Please sign in to comment.