Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use an ad-block c++ library to speed up the url matching
- Loading branch information
Showing
8 changed files
with
205 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
[submodule "vendor/hashset-cpp"] | ||
path = vendor/hashset-cpp | ||
url = https://github.com/bbondy/hashset-cpp | ||
[submodule "vendor/bloom-filter-cpp"] | ||
path = vendor/bloom-filter-cpp | ||
url = https://github.com/bbondy/bloom-filter-cpp | ||
[submodule "vendor/ad-block"] | ||
path = vendor/ad-block | ||
url = https://github.com/brave/ad-block |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#include <Python.h> | ||
#include "structmember.h" | ||
|
||
#include "ad_block_client.h" | ||
|
||
typedef struct { | ||
PyObject_HEAD | ||
|
||
AdBlockClient * client; | ||
} AdBlock; | ||
|
||
|
||
static void | ||
AdBlock_dealloc(AdBlock* self) | ||
{ | ||
delete self->client; | ||
Py_TYPE(self)->tp_free((PyObject*)self); | ||
} | ||
|
||
static PyObject * | ||
AdBlock_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | ||
{ | ||
AdBlock *self; | ||
|
||
self = (AdBlock *)type->tp_alloc(type, 0); | ||
|
||
return (PyObject *)self; | ||
} | ||
|
||
static int | ||
AdBlock_init(AdBlock *self, PyObject *args, PyObject *kwds) | ||
{ | ||
self->client = new AdBlockClient; | ||
return 0; | ||
} | ||
|
||
static PyObject * | ||
AdBlock_parse(AdBlock* self, PyObject *args) | ||
{ | ||
const char *data; | ||
|
||
if (!PyArg_ParseTuple(args, "s", &data)) | ||
return NULL; | ||
|
||
self->client->parse(data); | ||
|
||
Py_RETURN_NONE; | ||
} | ||
|
||
static PyObject * | ||
AdBlock_matches(AdBlock* self, PyObject *args) | ||
{ | ||
const char *url, *domain; | ||
|
||
if (!PyArg_ParseTuple(args, "ss", &url, &domain)) | ||
return NULL; | ||
|
||
if (self->client->matches(url, FONoFilterOption, domain)) { | ||
Py_RETURN_TRUE; | ||
} else { | ||
Py_RETURN_FALSE; | ||
} | ||
} | ||
|
||
static PyMethodDef AdBlock_methods[] = { | ||
{"parse", (PyCFunction)AdBlock_parse, METH_VARARGS, | ||
"Parse adblock data string, like the content of an easylist." | ||
}, | ||
{"matches", (PyCFunction)AdBlock_matches, METH_VARARGS, | ||
"matches an url, returns True if it should be filtered." | ||
}, | ||
{NULL} /* Sentinel */ | ||
}; | ||
|
||
static PyTypeObject AdBlockType = { | ||
PyVarObject_HEAD_INIT(NULL, 0) | ||
"adblock.AdBlock", /* tp_name */ | ||
sizeof(AdBlock), /* tp_basicsize */ | ||
0, /* tp_itemsize */ | ||
(destructor)AdBlock_dealloc, /* tp_dealloc */ | ||
0, /* tp_print */ | ||
0, /* tp_getattr */ | ||
0, /* tp_setattr */ | ||
0, /* tp_reserved */ | ||
0, /* tp_repr */ | ||
0, /* tp_as_number */ | ||
0, /* tp_as_sequence */ | ||
0, /* tp_as_mapping */ | ||
0, /* tp_hash */ | ||
0, /* tp_call */ | ||
0, /* tp_str */ | ||
0, /* tp_getattro */ | ||
0, /* tp_setattro */ | ||
0, /* tp_as_buffer */ | ||
Py_TPFLAGS_DEFAULT | | ||
Py_TPFLAGS_BASETYPE, /* tp_flags */ | ||
"Adblock objects", /* tp_doc */ | ||
0, /* tp_traverse */ | ||
0, /* tp_clear */ | ||
0, /* tp_richcompare */ | ||
0, /* tp_weaklistoffset */ | ||
0, /* tp_iter */ | ||
0, /* tp_iternext */ | ||
AdBlock_methods, /* tp_methods */ | ||
0, /* tp_members */ | ||
0, /* tp_getset */ | ||
0, /* tp_base */ | ||
0, /* tp_dict */ | ||
0, /* tp_descr_get */ | ||
0, /* tp_descr_set */ | ||
0, /* tp_dictoffset */ | ||
(initproc)AdBlock_init, /* tp_init */ | ||
0, /* tp_alloc */ | ||
AdBlock_new, /* tp_new */ | ||
}; | ||
|
||
|
||
static PyModuleDef adblockmodule = { | ||
PyModuleDef_HEAD_INIT, | ||
"adblock", | ||
"Module to speed up ad filtering.", | ||
-1, | ||
NULL, NULL, NULL, NULL, NULL | ||
}; | ||
|
||
PyMODINIT_FUNC | ||
PyInit__adblock(void) | ||
{ | ||
PyObject* m; | ||
|
||
AdBlockType.tp_new = PyType_GenericNew; | ||
if (PyType_Ready(&AdBlockType) < 0) | ||
return NULL; | ||
|
||
m = PyModule_Create(&adblockmodule); | ||
if (m == NULL) | ||
return NULL; | ||
|
||
Py_INCREF(&AdBlockType); | ||
PyModule_AddObject(m, "AdBlock", (PyObject *)&AdBlockType); | ||
return m; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import os | ||
|
||
from distutils.core import setup, Extension | ||
|
||
|
||
THIS_DIR = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
bloom_dir = os.path.join(THIS_DIR, "vendor", "bloom-filter-cpp") | ||
hashset_dir = os.path.join(THIS_DIR, "vendor", "hashset-cpp") | ||
adblock_dir = os.path.join(THIS_DIR, "vendor", "ad-block") | ||
|
||
|
||
if "CC" not in os.environ: | ||
# force g++, not sure why but else gcc is used and the code does not | ||
# compile... | ||
os.environ["CC"] = "g++" | ||
|
||
adblocker = Extension( | ||
'_adblock', | ||
define_macros=[], | ||
language="c++", | ||
include_dirs=[bloom_dir, hashset_dir, adblock_dir], | ||
# not sure if that help for speed. Careful it strip the debug symbols | ||
extra_compile_args=["-g0"], | ||
sources=[ | ||
os.path.join(bloom_dir, "BloomFilter.cpp"), | ||
os.path.join(bloom_dir, "hashFn.cpp"), | ||
os.path.join(hashset_dir, "HashSet.cpp"), | ||
os.path.join(adblock_dir, "ad_block_client.cc"), | ||
os.path.join(adblock_dir, "filter.cc"), | ||
os.path.join(adblock_dir, "cosmetic_filter.cc"), | ||
os.path.join(THIS_DIR, "c", "adblock.c"), | ||
]) | ||
|
||
setup( | ||
name='webmacs', | ||
version='1.0', | ||
description='Keyboard driven web browser, emacs-like', | ||
author='Julien Pagès', | ||
author_email='j.parkouss@gmail.com', | ||
url='todo', | ||
long_description=''' | ||
Work in progress. | ||
''', | ||
ext_modules=[adblocker]) |
Submodule bloom-filter-cpp
added at
5e5a53
Submodule hashset-cpp
added at
6a6ca1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters