Skip to content

Commit

Permalink
Changed all providers to use minidom rather than elementTree to parse…
Browse files Browse the repository at this point in the history
… XML which should solve namespace problems (for Newzbin specifically)
  • Loading branch information
midgetspy committed Feb 26, 2012
1 parent f5b3af0 commit 97b2df7
Show file tree
Hide file tree
Showing 10 changed files with 122 additions and 112 deletions.
9 changes: 9 additions & 0 deletions sickbeard/helpers.py
Expand Up @@ -24,6 +24,8 @@
import re, socket
import shutil

from xml.dom.minidom import Node

import sickbeard

from sickbeard.exceptions import MultipleShowObjectsException
Expand Down Expand Up @@ -536,3 +538,10 @@ def create_https_certificates(ssl_cert, ssl_key):
if __name__ == '__main__':
import doctest
doctest.testmod()

def get_xml_text(node):
text = ""
for child_node in node.childNodes:
if child_node.nodeType in (Node.CDATA_SECTION_NODE, Node.TEXT_NODE):
text += child_node.data
return text.strip()
34 changes: 15 additions & 19 deletions sickbeard/providers/ezrss.py
Expand Up @@ -18,16 +18,15 @@

import urllib
import re

import xml.etree.cElementTree as etree
from xml.dom.minidom import parseString

import sickbeard
import generic

from sickbeard.common import Quality
from sickbeard import logger
from sickbeard import tvcache
from sickbeard.helpers import sanitizeSceneName
from sickbeard.helpers import sanitizeSceneName, get_xml_text
from sickbeard.exceptions import ex

class EZRSSProvider(generic.TorrentProvider):
Expand All @@ -41,8 +40,6 @@ def __init__(self):
self.cache = EZRSSCache(self)

self.url = 'https://www.ezrss.it/'

self.ezrss_ns = 'http://xmlns.ezrss.it/0.1/'

def isEnabled(self):
return sickbeard.EZRSS
Expand All @@ -52,8 +49,12 @@ def imageName(self):

def getQuality(self, item):

filename = item.findtext('{%s}torrent/{%s}fileName' %(self.ezrss_ns,self.ezrss_ns))
torrent_node = item.getElementsByTagName('torrent')[0]
filename_node = torrent_node.getElementsByTagName('filename')[0]
filename = get_xml_text(filename_node)

quality = Quality.nameQuality(filename)

return quality

def findSeasonResults(self, show, season):
Expand Down Expand Up @@ -115,8 +116,8 @@ def _doSearch(self, search_params, show=None):
return []

try:
responseSoup = etree.ElementTree(etree.XML(data))
items = responseSoup.getiterator('item')
parsedXML = parseString(data)
items = parsedXML.getElementsByTagName('item')
except Exception, e:
logger.log(u"Error trying to load EZRSS RSS feed: "+ex(e), logger.ERROR)
logger.log(u"RSS data: "+data, logger.DEBUG)
Expand All @@ -137,9 +138,11 @@ def _doSearch(self, search_params, show=None):
return results

def _get_title_and_url(self, item):
title = item.findtext('title')
url = item.findtext('link').replace('&','&')
filename = item.findtext('{%s}torrent/{%s}fileName' %(self.ezrss_ns,self.ezrss_ns))
(title, url) = generic.TorrentProvider._get_title_and_url(item)

torrent_node = item.getElementsByTagName('torrent')[0]
filename_node = torrent_node.getElementsByTagName('filename')[0]
filename = get_xml_text(filename_node)

new_title = self._extract_name_from_filename(filename)
if new_title:
Expand Down Expand Up @@ -178,14 +181,7 @@ def _getRSSData(self):

def _parseItem(self, item):

title = item.findtext('title')
url = item.findtext('link')
filename = item.findtext('{%s}torrent/{%s}fileName' %(self.provider.ezrss_ns,self.provider.ezrss_ns))

new_title = self.provider._extract_name_from_filename(filename)
if new_title:
title = new_title
logger.log(u"Extracted the name "+title+" from the torrent link", logger.DEBUG)
(title, url) = self.provider._get_title_and_url(item)

if not title or not url:
logger.log(u"The XML returned from the EZRSS RSS feed is incomplete, this result is unusable", logger.ERROR)
Expand Down
20 changes: 17 additions & 3 deletions sickbeard/providers/generic.py
Expand Up @@ -180,7 +180,14 @@ def searchRSS(self):
return self.cache.findNeededEpisodes()

def getQuality(self, item):
title = item.findtext('title')
"""
Figures out the quality of the given RSS item node
item: An xml.dom.minidom.Node representing the <item> tag of the RSS feed
Returns a Quality value obtained from the node's data
"""
(title, url) = self._get_title_and_url(item) #@UnusedVariable
quality = Quality.nameQuality(title)
return quality

Expand All @@ -194,8 +201,15 @@ def _get_episode_search_strings(self, ep_obj):
return []

def _get_title_and_url(self, item):
title = item.findtext('title')
url = item.findtext('link')
"""
Retrieves the title and URL data from the item XML node
item: An xml.dom.minidom.Node representing the <item> tag of the RSS feed
Returns: A tuple containing two strings representing title and URL respectively
"""
title = helpers.get_xml_text(item.getElementsByTagName('title')[0])
url = helpers.get_xml_text(item.getElementsByTagName('link')[0])
if url:
url = url.replace('&amp;','&')

Expand Down
32 changes: 14 additions & 18 deletions sickbeard/providers/newzbin.py
Expand Up @@ -22,7 +22,7 @@
import time
import urllib

import xml.etree.cElementTree as etree
from xml.dom.minidom import parseString
from datetime import datetime, timedelta

import sickbeard
Expand Down Expand Up @@ -75,26 +75,22 @@ def __init__(self):

self.url = 'https://www.newzbin2.es/'

self.NEWZBIN_NS = 'http://www.newzbin2.es/DTD/2007/feeds/report/'

self.NEWZBIN_DATE_FORMAT = '%a, %d %b %Y %H:%M:%S %Z'

def _report(self, name):
return '{'+self.NEWZBIN_NS+'}'+name

def isEnabled(self):
return sickbeard.NEWZBIN

def getQuality(self, item):
attributes = item.find(self._report('attributes'))
attributes = item.getElementsByTagName('report:attributes')[0]
attr_dict = {}

for attribute in attributes.getiterator(self._report('attribute')):
cur_attr = attribute.attrib['type']
for attribute in attributes.getElementsByTagName('report:attribute'):
cur_attr = attribute.getAttribute('type')
cur_attr_value = helpers.get_xml_text(attribute)
if cur_attr not in attr_dict:
attr_dict[cur_attr] = [attribute.text]
attr_dict[cur_attr] = [cur_attr_value]
else:
attr_dict[cur_attr].append(attribute.text)
attr_dict[cur_attr].append(cur_attr_value)

logger.log("Finding quality of item based on attributes "+str(attr_dict), logger.DEBUG)

Expand Down Expand Up @@ -276,25 +272,26 @@ def _doSearch(self, searchStr, show=None):
item_list = []

try:
responseSoup = etree.ElementTree(etree.XML(data))
items = responseSoup.getiterator('item')
parsedXML = parseString(data)
items = parsedXML.getElementsByTagName('item')
except Exception, e:
logger.log("Error trying to load Newzbin RSS feed: "+ex(e), logger.ERROR)
return []

for cur_item in items:
title = cur_item.findtext('title')
title = helpers.get_xml_text(cur_item.getElementsByTagName('title')[0])
if title == 'Feeds Error':
raise exceptions.AuthException("The feed wouldn't load, probably because of invalid auth info")
if sickbeard.USENET_RETENTION is not None:
try:
dateString = cur_item.findtext('{http://www.newzbin2.es/DTD/2007/feeds/report/}postdate')
dateString = helpers.get_xml_text(cur_item.getElementsByTagName('report:postdate')[0])
# use the parse (imported as parseDate) function from the dateutil lib
# and we have to remove the timezone info from it because the retention_date will not have one
# and a comparison of them is not possible
post_date = parseDate(dateString).replace(tzinfo=None)
retention_date = datetime.now() - timedelta(days=sickbeard.USENET_RETENTION)
if post_date < retention_date:
logger.log(u"Date "+str(post_date)+" is out of retention range, skipping", logger.DEBUG)
continue
except Exception, e:
logger.log("Error parsing date from Newzbin RSS feed: " + str(e), logger.ERROR)
Expand Down Expand Up @@ -350,7 +347,7 @@ def __init__(self, provider):
tvcache.TVCache.__init__(self, provider)

# only poll Newzbin every 10 mins max
self.minTime = 10
self.minTime = 1

def _getRSSData(self):

Expand All @@ -360,8 +357,7 @@ def _getRSSData(self):

def _parseItem(self, item):

title = item.findtext('title')
url = item.findtext('link')
(title, url) = self.provider._get_title_and_url(item)

if title == 'Feeds Error':
logger.log("There's an error in the feed, probably bad auth info", logger.DEBUG)
Expand Down
67 changes: 29 additions & 38 deletions sickbeard/providers/newznab.py
Expand Up @@ -23,7 +23,7 @@
import re
import os

import xml.etree.cElementTree as etree
from xml.dom.minidom import parseString

import sickbeard
import generic
Expand Down Expand Up @@ -150,7 +150,27 @@ def _get_episode_search_strings(self, ep_obj):
def _doGeneralSearch(self, search_string):
return self._doSearch({'q': search_string})

#def _doSearch(self, show, season=None, episode=None, search=None):
def _checkAuthFromData(self, data):

try:
parsedXML = parseString(data)
except Exception:
return False

if parsedXML.documentElement.tagName == 'error':
code = parsedXML.documentElement.getAttribute('code')
if code == '100':
raise exceptions.AuthException("Your API key for "+self.name+" is incorrect, check your config.")
elif code == '101':
raise exceptions.AuthException("Your account on "+self.name+" has been suspended, contact the administrator.")
elif code == '102':
raise exceptions.AuthException("Your account isn't allowed to use the API on "+self.name+", contact the administrator")
else:
logger.log(u"Unknown error given from "+self.name+": "+parsedXML.documentElement.getAttribute('description'), logger.ERROR)
return False

return True

def _doSearch(self, search_params, show=None):

params = {"t": "tvsearch",
Expand Down Expand Up @@ -178,41 +198,29 @@ def _doSearch(self, search_params, show=None):
data = '<?xml version="1.0" encoding="ISO-8859-1" ?>' + data

try:
responseSoup = etree.ElementTree(etree.XML(data))
items = responseSoup.getiterator('item')
parsedXML = parseString(data)
items = parsedXML.getElementsByTagName('item')
except Exception, e:
logger.log(u"Error trying to load "+self.name+" RSS feed: "+ex(e), logger.ERROR)
logger.log(u"RSS data: "+data, logger.DEBUG)
return []

if responseSoup.getroot().tag == 'error':
code = responseSoup.getroot().get('code')
if code == '100':
raise exceptions.AuthException("Your API key for "+self.name+" is incorrect, check your config.")
elif code == '101':
raise exceptions.AuthException("Your account on "+self.name+" has been suspended, contact the administrator.")
elif code == '102':
raise exceptions.AuthException("Your account isn't allowed to use the API on "+self.name+", contact the administrator")
else:
logger.log(u"Unknown error given from "+self.name+": "+responseSoup.getroot().get('description'), logger.ERROR)
return []
if not self._checkAuthFromData(data):
return []

if responseSoup.getroot().tag != 'rss':
if parsedXML.documentElement.tagName != 'rss':
logger.log(u"Resulting XML from "+self.name+" isn't RSS, not parsing it", logger.ERROR)
return []

results = []

for curItem in items:
title = curItem.findtext('title')
url = curItem.findtext('link')
(title, url) = self._get_title_and_url(curItem)

if not title or not url:
logger.log(u"The XML returned from the "+self.name+" RSS feed is incomplete, this result is unusable: "+data, logger.ERROR)
continue

url = url.replace('&amp;','&')

results.append(curItem)

return results
Expand Down Expand Up @@ -268,21 +276,4 @@ def _getRSSData(self):

def _checkAuth(self, data):

try:
responseSoup = etree.ElementTree(etree.XML(data))
except Exception:
return True

if responseSoup.getroot().tag == 'error':
code = responseSoup.getroot().get('code')
if code == '100':
raise exceptions.AuthException("Your API key for "+self.provider.name+" is incorrect, check your config.")
elif code == '101':
raise exceptions.AuthException("Your account on "+self.provider.name+" has been suspended, contact the administrator.")
elif code == '102':
raise exceptions.AuthException("Your account isn't allowed to use the API on "+self.provider.name+", contact the administrator")
else:
logger.log(u"Unknown error given from "+self.provider.name+": "+responseSoup.getroot().get('description'), logger.ERROR)
return False

return True
return self.provider._checkAuthFromData(data)
18 changes: 9 additions & 9 deletions sickbeard/providers/nzbmatrix.py
Expand Up @@ -21,12 +21,12 @@
import urllib
import datetime

import xml.etree.cElementTree as etree
from xml.dom.minidom import parseString

import sickbeard
import generic

from sickbeard import classes, logger, show_name_helpers
from sickbeard import classes, logger, show_name_helpers, helpers
from sickbeard import tvcache
from sickbeard.exceptions import ex

Expand Down Expand Up @@ -95,17 +95,16 @@ def _doSearch(self, curString, quotes=False, show=None):
return []

try:
responseSoup = etree.ElementTree(etree.XML(searchResult))
items = responseSoup.getiterator('item')
parsedXML = parseString(searchResult)
items = parsedXML.getElementsByTagName('item')
except Exception, e:
logger.log(u"Error trying to load NZBMatrix RSS feed: "+ex(e), logger.ERROR)
return []

results = []

for curItem in items:
title = curItem.findtext('title')
url = curItem.findtext('link')
(title, url) = self._get_title_and_url(curItem)

if title == 'Error: No Results Found For Your Search':
continue
Expand All @@ -125,10 +124,11 @@ def findPropers(self, date=None):

for curResult in self._doSearch("(PROPER,REPACK)"):

title = curResult.findtext('title')
url = curResult.findtext('link').replace('&amp;','&')
(title, url) = self._get_title_and_url(curResult)

description_node = curResult.getElementsByTagName('description')[0]
descriptionStr = helpers.get_xml_text(description_node)

descriptionStr = curResult.findtext('description')
dateStr = re.search('<b>Added:</b> (\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)', descriptionStr).group(1)
if not dateStr:
logger.log(u"Unable to figure out the date for entry "+title+", skipping it")
Expand Down

0 comments on commit 97b2df7

Please sign in to comment.