From d91c3555352f5fbf72ad44587496f8dc1f933f92 Mon Sep 17 00:00:00 2001 From: Luke Lee Date: Tue, 3 May 2016 11:02:07 +0200 Subject: [PATCH] Properly encode URLs passed to github contents endpoint - This fixes a bug where a URL could come in with 2 '?'. We'd pass that down to the github API, and it would be ignored by the API. Then, the API would return a different return type that expected in rendered_markdown_from_github. - Now these URLs will return a 404 because the requested guide can not be found. In addition, this will allow us to support guides with special URL characters in them now like '?'. - To reproduce and test see below. - For example, request this URL: - /front-end-javascript/introduction-to-test-driven-development-in-javascript?status=in-review?branch=adityavarshney - This would result in us passing the following 'path' to the github contents API: - repos/durden/articles/contents/in-review?branch=adityavarshney/front-end-javascript/introduction-to-test-driven-development-in-javascript/article.md - This would in turn return the contents of the contents/in-review/ directory, not a single file. This would finally result in a TypeError because the response was not text, but a list of values for each file in the directory. --- pskb_website/remote.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pskb_website/remote.py b/pskb_website/remote.py index d420788..5644df7 100644 --- a/pskb_website/remote.py +++ b/pskb_website/remote.py @@ -5,6 +5,7 @@ import base64 import collections import json +import urllib from flask_oauthlib.client import OAuth from flask import session @@ -269,7 +270,7 @@ def read_file_from_github(path, branch=u'master', rendered_text=True, # seems like a safe enough bet at this point. owner, repo, file_path = split_full_file_path(path) url = u'https://github.com/%s/%s/blob/%s/%s' % (owner, repo, branch, - file_path) + urllib.pathname2url(file_path)) details = file_details(path, branch, None, None, url, text) else: @@ -295,11 +296,7 @@ def rendered_markdown_from_github(path, branch=u'master', allow_404=False): resp = github.get(url, headers=headers, data={'ref': branch}) if resp.status == 200: - try: - return unicode(resp.data, encoding='utf-8') - except TypeError: - app.logger.error('Failed parsing response, url: "%s", branch: "%s"', url, branch) - raise + return unicode(resp.data, encoding='utf-8') if resp.status != 404 or not allow_404: log_error('Failed reading rendered markdown', url, resp, branch=branch) @@ -517,7 +514,8 @@ def contents_url_from_path(path): """ owner, repo, file_path = split_full_file_path(path) - return 'repos/%s/%s/contents/%s' % (owner, repo, file_path) + return urllib.pathname2url('repos/%s/%s/contents/%s' % (owner, repo, + file_path)) def read_branch(repo_path, name):