Skip to content

Commit

Permalink
add basic search server for #2
Browse files Browse the repository at this point in the history
  • Loading branch information
mdamien committed May 20, 2016
1 parent edb08a2 commit 2b93a87
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 3 deletions.
38 changes: 38 additions & 0 deletions extract_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import shlex
from tqdm import tqdm
import shutil
import subprocess
from subprocess import STDOUT, check_output

from extstats.CONSTS import CRX_DIRECTORY as DIR
from distutils.version import LooseVersion

DESTINATION = 'crawled/sources/{ext_id}/{version}'

def sort_semverfiles(files):
def keyfunc(filename):
return LooseVersion(filename.replace('.zip', ''))
return sorted(files, key=keyfunc)

for ext in os.listdir(DIR):
files = os.listdir(DIR + ext)
files_details = []
latest = sort_semverfiles(files)[-1]
fullpath = DIR + ext + '/' + latest
size = os.path.getsize(fullpath)
if size > 100000000: #100mb
continue
print(fullpath, size)
dest = DESTINATION.format(ext_id=ext, version=latest.replace('.zip', ''))
try:
shutil.rmtree(dest)
except FileNotFoundError:
pass
os.makedirs(dest, exist_ok=True)
try:
check_output('unzip {} -d {}'.format(shlex.quote(fullpath), shlex.quote(dest)), timeout=60, shell=True)
except subprocess.CalledProcessError as e:
print('error:', e.returncode, ' - ', e.cmd)
except subprocess.TimeoutExpired:
pass
5 changes: 3 additions & 2 deletions extstats/download_crx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import os.path
import sys

DOWNLOAD_URL = "https://clients2.google.com/service/update2/crx?response=redirect&prodversion=49.0.2623.108&x=id%3D{ID}%26uc"
DESTINATION = "crx/{ID}.crx"
# DOWNLOAD_URL = "https://clients2.google.com/service/update2/crx?response=redirect&os=cros&prodversion=9999&x=id%3D{ID}%26uc"
DOWNLOAD_URL = "https://clients2.google.com/service/update2/crx?response=redirect&os=cros&arch=x86-64&nacl_arch=x86-64&prod=chromiumcrx&prodchannel=unknown&prodversion=9999&x=id%3D{ID}%26uc"
DESTINATION = "{ID}.crx"


def down(ext_id, filename):
Expand Down
3 changes: 2 additions & 1 deletion req.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ tqdm
jinja2
arrow
beautifulsoup4
elasticsearch_dsl
elasticsearch_dsl
flask
21 changes: 21 additions & 0 deletions source_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import flask
from flask import request
import subprocess
import time

app = flask.Flask(__name__)

@app.route('/')
def index():
q = request.args.get('q')
def inner():
proc = subprocess.Popen(
['ag', str(q), 'crawled/sources/'],
stdout=subprocess.PIPE
)
for line in iter(proc.stdout.readline, ''):
yield line.rstrip().decode('utf8')+'\n'

return flask.Response(inner(), mimetype='text/text')

app.run(debug=True, port=5000)

0 comments on commit 2b93a87

Please sign in to comment.