Skip to content

Commit

Permalink
Merge pull request #189 from sakurai-youhei/PR/static_dep-with-proxy
Browse files Browse the repository at this point in the history
Replace ftplib with urllib to pick up ftp_proxy when building lxml with STATIC_DEPS=true
  • Loading branch information
scoder committed Mar 8, 2016
2 parents 3585b57 + d8abcf7 commit 0334372
Showing 1 changed file with 33 additions and 10 deletions.
43 changes: 33 additions & 10 deletions buildlibxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from distutils import log, sysconfig, version

try:
from urlparse import urlsplit, urljoin
from urllib import urlretrieve
from urlparse import urlsplit, urljoin, unquote
from urllib import urlretrieve, urlopen
except ImportError:
from urllib.parse import urlsplit, urljoin
from urllib.request import urlretrieve
from urllib.parse import urlsplit, urljoin, unquote
from urllib.request import urlretrieve, urlopen

multi_make_options = []
try:
Expand Down Expand Up @@ -100,14 +100,37 @@ def get_prebuilt_libxml2xslt(download_dir, static_include_dirs, static_library_d
match_libfile_version = re.compile('^[^-]*-([.0-9-]+)[.].*').match

def ftp_listdir(url):
import ftplib, posixpath
scheme, netloc, path, qs, fragment = urlsplit(url)
assert scheme.lower() == 'ftp'
server = ftplib.FTP(netloc)
server.login()
files = [posixpath.basename(fn) for fn in server.nlst(path)]
assert url.lower().startswith('ftp://')
from email.message import Message
res = urlopen(url)
content_type = res.headers.get('Content-Type')
if content_type:
msg = Message()
msg.add_header('Content-Type', content_type)
charset = msg.get_content_charset('utf-8')
else:
charset = 'utf-8'
if content_type and content_type.startswith('text/html'):
files = parse_html_ftplist(res.read().decode(charset))
else:
files = parse_text_ftplist(res.read().decode(charset))
res.close()
return files

def parse_text_ftplist(s):
for line in s.splitlines():
if not line.startswith('d'):
# -rw-r--r-- 1 ftp ftp 476 Sep 1 2011 md5sum.txt
# Last (9th) element is 'md5sum.txt' in the above example.
yield line.split(None, 9)[-1]

def parse_html_ftplist(s):
re_href = re.compile(r'<a\s+(?:[^>]*?\s+)?href=["\'](.*?)[;\?"\']', re.I|re.M)
links = set(re_href.findall(s))
for link in links:
if not link.endswith('/'):
yield unquote(link)

def tryint(s):
try:
return int(s)
Expand Down

0 comments on commit 0334372

Please sign in to comment.