Added initial addon

topfs2 · Jun 8, 2012 · d6dbaed · d6dbaed
1 parent 15dd7f0
commit d6dbaed
Show file tree

Hide file tree

Showing 7 changed files with 482 additions and 0 deletions.
diff --git a/addon.xml b/addon.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<addon
+  id="statistics.gsoc.scraper"
+  version="0.0.1"
+  name="Statistics gathering for scraping GSoC 2012"
+  provider-name="topfs2, Team XBMC">
+  <requires>
+    <import addon="xbmc.python" version="2.0"/>
+  </requires>
+  <extension 
+    point="xbmc.python.script" library="default.py"/>
+
+  <extension point="xbmc.addon.metadata">
+    <language></language>
+    <summary>Statistics gathering for scraping GSoC 2012. All data gather will be anonymous and contain no information about who its gathered from. The script will gather informatio about the files existing in your sources (without any passwords or usernames) and will couple that with what is scanned into the database (again no usernames or passwords). This will be posted to xbmc.org for further analysis.</summary>
+    <platform>all</platform>
+  </extension>
+</addon>
diff --git a/default.py b/default.py
@@ -0,0 +1,12 @@
+import xbmc, xbmcgui
+import xbmcjsonrpc
+import state
+import states
+
+#get actioncodes from keymap.xml
+ACTION_PREVIOUS_MENU = 10
+
+sm = state.StateManager()
+
+sm.switchTo(states.InitialWindow())
+sm.doModal()
diff --git a/extraction.py b/extraction.py
@@ -0,0 +1,185 @@
+from url import removeFromStackAndRecurse
+import json
+
+from xbmcjsonrpc import *
+
+movie_properties = [
+	"title",
+	"runtime",
+	"imdbnumber",
+	"year",
+	"file"
+]
+
+episode_properties = [
+	"title",
+	"season",
+	"episode",
+	"tvshowid",
+	"file"
+]
+
+show_properties = [
+	"title",
+	"imdbnumber"
+]
+
+# Taken from XBMC
+m_pictureExtensions = ".png|.jpg|.jpeg|.bmp|.gif|.ico|.tif|.tiff|.tga|.pcx|.cbz|.zip|.cbr|.rar|.m3u|.dng|.nef|.cr2|.crw|.orf|.arw|.erf|.3fr|.dcr|.x3f|.mef|.raf|.mrw|.pef|.sr2|.rss"
+m_pictureExtensions = m_pictureExtensions.split("|")
+
+# Taken from XBMC
+m_musicExtensions = ".nsv|.m4a|.flac|.aac|.strm|.pls|.rm|.rma|.mpa|.wav|.wma|.ogg|.mp3|.mp2|.m3u|.mod|.amf|.669|.dmf|.dsm|.far|.gdm|.imf|.it|.m15|.med|.okt|.s3m|.stm|.sfx|.ult|.uni|.xm|.sid|.ac3|.dts|.cue|.aif|.aiff|.wpl|.ape|.mac|.mpc|.mp+|.mpp|.shn|.zip|.rar|.wv|.nsf|.spc|.gym|.adx|.dsp|.adp|.ymf|.ast|.afc|.hps|.xsp|.xwav|.waa|.wvs|.wam|.gcm|.idsp|.mpdsp|.mss|.spt|.rsd|.mid|.kar|.sap|.cmc|.cmr|.dmc|.mpt|.mpd|.rmt|.tmc|.tm8|.tm2|.oga|.url|.pxml|.tta|.rss|.cm3|.cms|.dlt|.brstm|.wtv|.mka"
+m_musicExtensions = m_musicExtensions.split("|")
+
+# Taken from XBMC
+m_videoExtensions = ".m4v|.3g2|.3gp|.nsv|.tp|.ts|.ty|.strm|.pls|.rm|.rmvb|.m3u|.ifo|.mov|.qt|.divx|.xvid|.bivx|.vob|.nrg|.img|.iso|.pva|.wmv|.asf|.asx|.ogm|.m2v|.avi|.bin|.dat|.mpg|.mpeg|.mp4|.mkv|.avc|.vp3|.svq3|.nuv|.viv|.dv|.fli|.flv|.rar|.001|.wpl|.zip|.vdr|.dvr-ms|.xsp|.mts|.m2t|.m2ts|.evo|.ogv|.sdp|.avs|.rec|.url|.pxml|.vc1|.h264|.rcv|.rss|.mpls|.webm|.bdmv|.wtv"
+m_videoExtensions = m_videoExtensions.split("|")
+
+def extractEpisodes(files, onProgress, isInterrupted):
+# This method will fetch FILE TVSHOW_TITLE EPISODE_TITLE SEASON EPISODE from episodes in the video library
+	tvshows = dict()
+
+	result = getTVShows(show_properties)
+
+	for show in result:
+		tvshows[show["tvshowid"]] = show["title"]
+
+	result = getEpisodes(episode_properties)
+
+	episodes = list()
+	nbrEpisodes = len(result)
+
+	for i in range(nbrEpisodes):
+		e = result[i]
+
+		if onProgress:
+			onProgress(i * 100 / nbrEpisodes)
+
+		path = removeFromStackAndRecurse(e["file"])
+		files.add(path)
+
+		episode = {
+			"file": path,
+			"tvshow_title": tvshows[e["tvshowid"]],
+			"episode_title": e["title"],
+			"season": e["season"],
+			"episode": e["episode"]
+		}
+
+		episodes.append(episode)
+
+		if isInterrupted():
+			break
+
+	return episodes
+
+def extractMovies(files, onProgress, isInterrupted):
+# This method will fetch FILE TITLE YEAR IMDB RUNTIME from movies in the video library
+	result = getMovies(movie_properties)
+
+	movies = list()
+	nbrMovies = len(result)
+
+	for i in range(nbrMovies):
+		m = result[i]
+
+		if onProgress:
+			onProgress(i * 100 / nbrMovies)
+
+		path = removeFromStackAndRecurse(m["file"])
+		files.add(path)
+
+		movie = {
+			"file": path,
+			"title": m["title"],
+			"year": m["year"],
+			"imdb": m["imdbnumber"],
+			"runtime": m["runtime"]
+		}
+
+		movies.append(movie)
+
+		if isInterrupted():
+			break
+
+	return movies
+
+def getExtension(path):
+	try:
+		return path[path.rindex("."):].lower()
+	except ValueError:
+		return None
+
+def extractVideoFilesFromDirectory(files, videoFiles, directory, isInterrupted, onProgress = None):
+	result = getDirectory(directory)
+
+	thisDirectory = result
+	nbrFiles = len(thisDirectory)
+	for i in range(nbrFiles):
+		f = thisDirectory[i]
+
+		if onProgress:
+			onProgress(i * 100 / nbrFiles)
+
+		if f["filetype"] == "directory":
+			extractVideoFilesFromDirectory(files, videoFiles, f["file"], isInterrupted)
+		elif f["filetype"] == "file":
+			path = removeFromStackAndRecurse(f["file"])
+			if path not in files and getExtension(path) in m_videoExtensions:
+
+				# Here we could extract subtitles etc.
+
+				videoFile = {
+					"file": path
+				}
+				videoFiles.append(videoFile)
+
+		if isInterrupted():
+			break
+
+def extractVideoFiles(files, onProgress, isInterrupted):
+	sources = getSources()
+
+	videoFiles = list()
+	nbrSources = len(sources)
+
+	for i in range(nbrSources):
+		source = sources[i]
+
+		if onProgress:
+			onProgress(source["label"], i * 100 / len(sources))
+
+			def midProgress(percentage):
+				onProgress(source["label"], (i * 100 + (percentage / nbrSources) + 1) / nbrSources)
+			extractVideoFilesFromDirectory(files, videoFiles, source["file"], isInterrupted, midProgress)
+		else:
+			extractVideoFilesFromDirectory(files, videoFiles, source["file"], isInterrupted, None)
+
+		if isInterrupted():
+			break
+
+	return videoFiles
+
+def main():
+	isInterrupted = lambda : False
+
+	files = set()
+	episodes = extractEpisodes(files, None, isInterrupted)
+	movies = extractMovies(files, None, isInterrupted)
+
+	videoFiles = extractVideoFiles(files, None, isInterrupted)
+
+
+	f = open('episodes.json', 'w')
+	json.dump(episodes, f, sort_keys=True, indent=4)
+
+	f = open('movies.json', 'w')
+	json.dump(movies, f, sort_keys=True, indent=4)
+
+	f = open('videoFiles.json', 'w')
+	json.dump(videoFiles, f, sort_keys=True, indent=4)
+
+if __name__ == "__main__":
+	main()
+
diff --git a/state.py b/state.py
@@ -0,0 +1,21 @@
+class StateManager(object):
+	def __init__(self):
+		self.stack = list()
+		self.active = None
+
+	def switchTo(self, state):
+		state.sm = self
+
+		if self.active != None:
+			self.active.close()
+			self.stack.append(state)
+		else:
+			self.active = state
+
+	def doModal(self):
+		while self.active != None:
+			self.active.doModal()
+			self.active = None
+
+			if len(self.stack) > 0:
+				self.active = self.stack.pop(0)
diff --git a/states.py b/states.py
@@ -0,0 +1,151 @@
+import xbmc, xbmcgui
+from xbmcjsonrpc import getSources
+import extraction
+import string
+import urllib2
+import json
+
+def post(address, d):
+	h = {
+		"Content-Type": "application/json",
+
+		# Some extra headers for fun
+		"Accept": "*/*",   # curl does this
+		"User-Agent": "xbmc-gsoc2012-statistics", # otherwise it uses "Python-urllib/..."
+	}
+
+	req = urllib2.Request(address, headers = h, data = d)
+
+	f = urllib2.urlopen(req)
+
+class SubmitState(object):
+	def __init__(self, episodes, movies, videoFiles):
+		self.episodes = episodes
+		self.movies = movies
+		self.videoFiles = videoFiles
+
+	def doModal(self):
+		dialog = xbmcgui.Dialog()
+		ret = dialog.yesno('Submit?', '{0} episodes'.format(len(self.episodes)), '{0} movies'.format(len(self.movies)), '{0} video files'.format(len(self.videoFiles)))
+
+		if ret:
+			progress = xbmcgui.DialogProgress()
+			ret = progress.create('GSoC 2012', 'Initializing upload...', "")
+
+			progress.update(1, "Uploading episodes")
+			post("http://127.0.0.1:8000/episodes", json.dumps(self.episodes))
+			if progress.iscanceled():
+				return
+
+			progress.update(34, "Uploading movies")
+			post("http://127.0.0.1:8000/movies", json.dumps(self.movies))
+			if progress.iscanceled():
+				return
+
+			progress.update(67, "Uploading unscraped video files")
+			post("http://127.0.0.1:8000/videofiles", json.dumps(self.videoFiles))
+
+			progress.update(100)
+			progress.close()
+
+	def close(self):
+		pass
+
+class GatherState(object):
+	def __init__(self, extractionSteps):
+		self.gatherDialog = xbmcgui.DialogProgress()
+		self.extractionSteps = extractionSteps
+
+	def doModal(self):
+		ret = self.gatherDialog.create('GSoC 2012', 'Initializing extractors...', "")
+
+		try:
+			self.steps = len(self.extractionSteps)
+			files = set()
+
+			episodes = list()
+			if "episodes" in self.extractionSteps:
+				def episodeProgress(percentage):
+					self.gatherDialog.update(percentage / self.steps, "Extracting episodes", "", "")
+				episodes = extraction.extractEpisodes(files, episodeProgress, self.gatherDialog.iscanceled)
+
+			movies = list()
+			if "movies" in self.extractionSteps:
+				def movieProgress(percentage):
+					self.gatherDialog.update((100 + percentage) / self.steps, "Extracting movies", "", "")
+				movies = extraction.extractMovies(files, movieProgress, self.gatherDialog.iscanceled)
+
+			videoFiles = list()
+			sources = [s for s in getSources() if s["file"] in self.extractionSteps]
+			nbrSources = len(sources)
+
+			for i in range(nbrSources):
+				source = sources[i]
+				source["tick"] = 0
+				source["percentage"] = 0
+
+				def unscrapedIsCanceled():
+					source["tick"] = source["tick"] + 1 if source["tick"] < 5 else 0
+					s = string.join(['.' for s in range(source["tick"])])
+					offset = 200 + i * nbrSources
+
+					self.gatherDialog.update((offset + source["percentage"]) / self.steps, "Extracting unscraped videos " + s, source["label"], "")
+
+					return self.gatherDialog.iscanceled()
+
+				def midProgress(i):
+					source["percentage"] = i / nbrSources
+					unscrapedIsCanceled()
+
+				extraction.extractVideoFilesFromDirectory(files, videoFiles, source["file"], unscrapedIsCanceled, midProgress)
+
+		except:
+			raise
+		finally:
+			self.gatherDialog.close()
+
+		self.sm.switchTo(SubmitState(episodes, movies, videoFiles))
+
+	def close(self):
+		pass
+
+class InitialWindow(xbmcgui.Window):
+	def __init__(self):
+		self.strActionInfo = xbmcgui.ControlLabel(0, 0, 300, 200, 'Push BACK to cancel', 'font13', '0xFFFFFFFF')
+		self.addControl(self.strActionInfo)
+
+		self.choiceButton = list()
+		self.choiceID = list()
+
+		self.gather = xbmcgui.ControlButton(800, 50, 200, 100, "Next!")
+		self.addControl(self.gather)
+
+		self.addChoice("Submit scraped movies", "movies")
+		self.addChoice("Submit scraped episodes", "episodes")
+		self.addChoice("Submit scraped music videos", "musicvideos")
+
+		for source in getSources():
+			self.addChoice(u'Submit unscraped videos from "' + source["label"] + u'"', source["file"])
+
+		self.setFocus(self.choiceButton[0])
+		self.gather.controlLeft(self.choiceButton[0])
+
+	def addChoice(self, label, ID):
+		button = xbmcgui.ControlRadioButton(50, 50 + 50 * len(self.choiceButton), 600, 40, label)
+		self.addControl(button)
+		button.setSelected(True)
+
+		if len(self.choiceButton) > 0:
+			last = self.choiceButton[-1]
+			last.controlDown(button)
+			button.controlUp(last)
+
+		button.controlRight(self.gather)
+
+		self.choiceID.append(ID)
+		self.choiceButton.append(button)
+
+	def onControl(self, control):
+		if control is self.gather:
+			steps = [self.choiceID[self.choiceButton.index(b)] for b in self.choiceButton if b.isSelected()]
+			self.sm.switchTo(GatherState(steps))