Skip to content

Commit

Permalink
Merge branch 'release/0.1.7'
Browse files Browse the repository at this point in the history
  • Loading branch information
ladyrassilon committed Oct 4, 2014
2 parents fe4bcd1 + 4c7c78e commit d8f2d07
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 19 deletions.
13 changes: 8 additions & 5 deletions presser/exceptions.py
@@ -1,15 +1,18 @@

class PresserJavaScriptParseError(Exception):
class PresserError(Exception):
pass

class PresserJavaScriptParseError(PresserError):
pass

class PresserInvalidVineIdError(Exception):
class PresserInvalidVineIdError(PresserError):
pass

class PresserURLError(Exception):
class PresserURLError(PresserError):
pass

class Presser404Error(Exception):
class Presser404Error(PresserError):
pass

class PresserRequestError(Exception):
class PresserRequestError(PresserError):
pass
13 changes: 6 additions & 7 deletions presser/presser.py
Expand Up @@ -7,17 +7,14 @@
from .exceptions import PresserJavaScriptParseError, PresserURLError, Presser404Error, PresserRequestError, PresserInvalidVineIdError

class Presser:
def get_data_for_vine_id(self, vine_id):
def get_data_for_vine_id(self, vine_id, timeout=30):
try:
page = requests.get("https://vine.co/v/{}".format(vine_id))
page = requests.get("https://vine.co/v/{}".format(vine_id), timeout=timeout)
except requests.exceptions.RequestException as e:
error_message = "Problem with comminicating with vine page - {}".format(e)
raise PresserRequestError(error_message)
if page.ok:
content = BeautifulSoup(page.content)
if content.find("title").text == u'Vine':
if not content.find("body").text.count(u"Video:"):
raise Presser404Error("Could not find Vine Id {}".format(vine_id))
all_script_tags = content.find_all("script")
potential_script_tags = [script for script in all_script_tags if not script.has_attr("src")]
script_lines = []
Expand All @@ -38,16 +35,18 @@ def get_data_for_vine_id(self, vine_id):
except execjs.RuntimeError as e:
error_message = "Problem with parsing, check parsing logic. {}".format(e)
raise PresserJavaScriptParseError(error_message)
elif page.status_code == 404:
raise Presser404Error("{} could not be found".format(page.url))
else:
raise PresserURLError("{} could not be accessed {} - {}".format(page.url, page.status_code,page.content))

def get_data_for_vine_from_url(self, url):
def get_data_for_vine_from_url(self, url, timeout=30):
parsed_url = urllib.parse.urlparse(url)
if parsed_url.netloc == "vine.co":
results = re.search('/v/(?P<vine_id>\w+)',parsed_url.path)
if results:
vine_id = results.group("vine_id")
return self.get_data_for_vine_id(vine_id)
return self.get_data_for_vine_id(vine_id, timeout=timeout)
else:
raise PresserInvalidVineIdError("{} does not contain a valid vine id".format(parsed_url.path))
else:
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Expand Up @@ -4,7 +4,7 @@
long_description = readme_file.read()

setup(name="Presser",
version="0.1.6",
version="0.1.7",
packages=["presser",],
license="GNU GPL v3.0",
description="Extracts data from vine, in lieu of an API",
Expand Down Expand Up @@ -38,9 +38,9 @@
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: Implementation",
# "Programming Language :: Python :: Implementation",
"Programming Language :: Python :: Implementation :: CPython",
# "Programming Language :: Python :: Implementation :: PyPy",
"Programming Language :: Python :: Implementation :: PyPy",
"Programming Language :: Python",
"Intended Audience :: Developers",
]
Expand Down
14 changes: 10 additions & 4 deletions tests/unit.py
Expand Up @@ -2,7 +2,7 @@
import responses
import requests

from mock import patch
from mock import patch, MagicMock

from presser.presser import Presser
from presser.exceptions import Presser404Error, PresserURLError, PresserInvalidVineIdError, PresserJavaScriptParseError, PresserRequestError
Expand Down Expand Up @@ -30,7 +30,7 @@ def test_not_a_valid_vine_id(self):
@patch('presser.presser.Presser.get_data_for_vine_id')
def test_vine_id_extraction(self, vine_response):
vine = self.presser.get_data_for_vine_from_url(VINE_URL)
self.presser.get_data_for_vine_id.assert_called_with(VINE_ID)
self.presser.get_data_for_vine_id.assert_called_with(VINE_ID, timeout=30)

@responses.activate
def test_vine_data_extraction(self):
Expand All @@ -47,7 +47,7 @@ def test_404_detection_logic(self):
with open("tests/404.html") as not_found_html:
body = not_found_html.read()
responses.add(responses.GET, NOT_FOUND_URL,
body=body, status=200,
body=body, status=404,
content_type='text/html')
self.assertRaises(Presser404Error, self.presser.get_data_for_vine_from_url, NOT_FOUND_URL)

Expand Down Expand Up @@ -84,4 +84,10 @@ def test_error_request(self):

@patch("requests.models.Response.ok", False)
def test_page_not_okay(self):
self.assertRaises(PresserURLError, self.presser.get_data_for_vine_from_url, VINE_URL)
self.assertRaises(PresserURLError, self.presser.get_data_for_vine_from_url, VINE_URL)

@patch("requests.get")
def test_timeout_passed_through(self, request_mock):
#Yes this is hacky, BUT responses doesn't record the timeout parameter
self.assertRaises(TypeError, self.presser.get_data_for_vine_from_url, VINE_URL, timeout=5)
requests.get.assert_called_with(VINE_URL, timeout=5)

0 comments on commit d8f2d07

Please sign in to comment.