Skip to content

Commit

Permalink
implement a basic adblocker
Browse files Browse the repository at this point in the history
Note it requires a dependency that is not yet listed (since I need a setup.py
file!) and it would be really good to improve its speed, not sure how yet.
  • Loading branch information
parkouss committed Jun 29, 2017
1 parent 4a09a8b commit 22b5230
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 0 deletions.
63 changes: 63 additions & 0 deletions webmacs/adblock.py
@@ -0,0 +1,63 @@
import os
import time

from adblockparser import AdblockRules, AdblockRule
from concurrent.futures import ThreadPoolExecutor
import urllib.request

from PyQt5.QtCore import QRegExp

EASYLIST = (
"https://easylist.to/easylist/easylist.txt",
"https://easylist.to/easylist/easyprivacy.txt",
"https://easylist.to/easylist/fanboy-annoyance.txt"
)


class BlockRule(AdblockRule):
def _url_matches(self, url):
if self.regex_re is None:
self.regex_re = QRegExp(self.regex)
return self.regex_re.indexIn(url) != -1


class Adblocker(object):
def __init__(self, cache_path):
if not os.path.isdir(cache_path):
os.makedirs(cache_path)
self._cache_path = cache_path
self._urls = {}

def register_filter_url(self, url, destfile=None):
if destfile is None:
destfile = url.rsplit("/", 1)[-1]
self._urls[url] = os.path.join(self._cache_path, destfile)

def _download_file(self, url, path):
headers = {'User-Agent': "Magic Browser"}
req = urllib.request.Request(url, None, headers)
with urllib.request.urlopen(req, timeout=5) as conn:
with open(path, "w") as f:
data = conn.read()
f.write(data.decode("utf-8"))

def _fetch_urls(self):
to_download = [(url, path) for url, path in self._urls.items()
if not os.path.isfile(path)
or os.path.getmtime(path) > (time.time() + 3600)]
if to_download:
with ThreadPoolExecutor(max_workers=5) as executor:
for url, path in to_download:
executor.submit(self._download_file, url, path)

def generate_rules(self):
self._fetch_urls()
rules = []
for path in self._urls.values():
print (path)
with open(path) as f:
for line in f:
line = line.strip()
if line:
rules.append(line)
return AdblockRules(rules, rule_cls=BlockRule)
46 changes: 46 additions & 0 deletions webmacs/application.py
@@ -1,16 +1,35 @@
import os
import logging

from PyQt5.QtWebEngineWidgets import QWebEngineProfile, QWebEngineScript, \
QWebEngineSettings
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWidgets import QApplication

from . import require
from .websocket import WebSocketClientWrapper
from .keyboardhandler import KEY_EATER
from .adblock import EASYLIST, Adblocker


THIS_DIR = os.path.dirname(os.path.realpath(__file__))


class UrlInterceptor(QWebEngineUrlRequestInterceptor):
def __init__(self, app):
QWebEngineUrlRequestInterceptor.__init__(self)
generator = Adblocker(app.adblock_path())
for url in EASYLIST:
generator.register_filter_url(url)
self._adblock = generator.generate_rules()

def interceptRequest(self, request):
url = request.requestUrl().toString()
if self._adblock.should_block(url):
logging.info("filtered: %s", url)
request.block(True)


class Application(QApplication):
INSTANCE = None

Expand All @@ -21,6 +40,10 @@ def __init__(self, args):
with open(os.path.join(THIS_DIR, "app_style.css")) as f:
self.setStyleSheet(f.read())
self._setup_websocket()

self._setup_conf_paths()

self._interceptor = UrlInterceptor(self)
self._setup_default_profile(self.sock_client.port)

self.installEventFilter(KEY_EATER)
Expand All @@ -46,6 +69,25 @@ def __init__(self, args):

require(".default_webjumps")

def _setup_conf_paths(self):
self._conf_path = os.path.join(os.path.expanduser("~"), ".webmacs")

def mkdir(path):
if not os.path.isdir(path):
os.makedirs(path)

mkdir(self.conf_path())
mkdir(self.profiles_path())

def conf_path(self):
return self._conf_path

def profiles_path(self):
return os.path.join(self.conf_path(), "profiles")

def adblock_path(self):
return os.path.join(self.conf_path(), "adblock")

def _setup_websocket(self):
"""
An internal websocket is used to communicate between web page content
Expand All @@ -55,6 +97,10 @@ def _setup_websocket(self):

def _setup_default_profile(self, port):
default_profile = QWebEngineProfile.defaultProfile()
default_profile.setRequestInterceptor(self._interceptor)
path = self.profiles_path()
default_profile.setPersistentStoragePath(os.path.join(path, "default"))
default_profile.setCachePath(os.path.join(path, "cache"))

def inject_js(src, ipoint=QWebEngineScript.DocumentCreation,
iid=QWebEngineScript.ApplicationWorld):
Expand Down

0 comments on commit 22b5230

Please sign in to comment.