-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
482 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<addon | ||
id="statistics.gsoc.scraper" | ||
version="0.0.1" | ||
name="Statistics gathering for scraping GSoC 2012" | ||
provider-name="topfs2, Team XBMC"> | ||
<requires> | ||
<import addon="xbmc.python" version="2.0"/> | ||
</requires> | ||
<extension | ||
point="xbmc.python.script" library="default.py"/> | ||
|
||
<extension point="xbmc.addon.metadata"> | ||
<language></language> | ||
<summary>Statistics gathering for scraping GSoC 2012. All data gather will be anonymous and contain no information about who its gathered from. The script will gather informatio about the files existing in your sources (without any passwords or usernames) and will couple that with what is scanned into the database (again no usernames or passwords). This will be posted to xbmc.org for further analysis.</summary> | ||
<platform>all</platform> | ||
</extension> | ||
</addon> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import xbmc, xbmcgui | ||
import xbmcjsonrpc | ||
import state | ||
import states | ||
|
||
#get actioncodes from keymap.xml | ||
ACTION_PREVIOUS_MENU = 10 | ||
|
||
sm = state.StateManager() | ||
|
||
sm.switchTo(states.InitialWindow()) | ||
sm.doModal() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
from url import removeFromStackAndRecurse | ||
import json | ||
|
||
from xbmcjsonrpc import * | ||
|
||
movie_properties = [ | ||
"title", | ||
"runtime", | ||
"imdbnumber", | ||
"year", | ||
"file" | ||
] | ||
|
||
episode_properties = [ | ||
"title", | ||
"season", | ||
"episode", | ||
"tvshowid", | ||
"file" | ||
] | ||
|
||
show_properties = [ | ||
"title", | ||
"imdbnumber" | ||
] | ||
|
||
# Taken from XBMC | ||
m_pictureExtensions = ".png|.jpg|.jpeg|.bmp|.gif|.ico|.tif|.tiff|.tga|.pcx|.cbz|.zip|.cbr|.rar|.m3u|.dng|.nef|.cr2|.crw|.orf|.arw|.erf|.3fr|.dcr|.x3f|.mef|.raf|.mrw|.pef|.sr2|.rss" | ||
m_pictureExtensions = m_pictureExtensions.split("|") | ||
|
||
# Taken from XBMC | ||
m_musicExtensions = ".nsv|.m4a|.flac|.aac|.strm|.pls|.rm|.rma|.mpa|.wav|.wma|.ogg|.mp3|.mp2|.m3u|.mod|.amf|.669|.dmf|.dsm|.far|.gdm|.imf|.it|.m15|.med|.okt|.s3m|.stm|.sfx|.ult|.uni|.xm|.sid|.ac3|.dts|.cue|.aif|.aiff|.wpl|.ape|.mac|.mpc|.mp+|.mpp|.shn|.zip|.rar|.wv|.nsf|.spc|.gym|.adx|.dsp|.adp|.ymf|.ast|.afc|.hps|.xsp|.xwav|.waa|.wvs|.wam|.gcm|.idsp|.mpdsp|.mss|.spt|.rsd|.mid|.kar|.sap|.cmc|.cmr|.dmc|.mpt|.mpd|.rmt|.tmc|.tm8|.tm2|.oga|.url|.pxml|.tta|.rss|.cm3|.cms|.dlt|.brstm|.wtv|.mka" | ||
m_musicExtensions = m_musicExtensions.split("|") | ||
|
||
# Taken from XBMC | ||
m_videoExtensions = ".m4v|.3g2|.3gp|.nsv|.tp|.ts|.ty|.strm|.pls|.rm|.rmvb|.m3u|.ifo|.mov|.qt|.divx|.xvid|.bivx|.vob|.nrg|.img|.iso|.pva|.wmv|.asf|.asx|.ogm|.m2v|.avi|.bin|.dat|.mpg|.mpeg|.mp4|.mkv|.avc|.vp3|.svq3|.nuv|.viv|.dv|.fli|.flv|.rar|.001|.wpl|.zip|.vdr|.dvr-ms|.xsp|.mts|.m2t|.m2ts|.evo|.ogv|.sdp|.avs|.rec|.url|.pxml|.vc1|.h264|.rcv|.rss|.mpls|.webm|.bdmv|.wtv" | ||
m_videoExtensions = m_videoExtensions.split("|") | ||
|
||
def extractEpisodes(files, onProgress, isInterrupted): | ||
# This method will fetch FILE TVSHOW_TITLE EPISODE_TITLE SEASON EPISODE from episodes in the video library | ||
tvshows = dict() | ||
|
||
result = getTVShows(show_properties) | ||
|
||
for show in result: | ||
tvshows[show["tvshowid"]] = show["title"] | ||
|
||
result = getEpisodes(episode_properties) | ||
|
||
episodes = list() | ||
nbrEpisodes = len(result) | ||
|
||
for i in range(nbrEpisodes): | ||
e = result[i] | ||
|
||
if onProgress: | ||
onProgress(i * 100 / nbrEpisodes) | ||
|
||
path = removeFromStackAndRecurse(e["file"]) | ||
files.add(path) | ||
|
||
episode = { | ||
"file": path, | ||
"tvshow_title": tvshows[e["tvshowid"]], | ||
"episode_title": e["title"], | ||
"season": e["season"], | ||
"episode": e["episode"] | ||
} | ||
|
||
episodes.append(episode) | ||
|
||
if isInterrupted(): | ||
break | ||
|
||
return episodes | ||
|
||
def extractMovies(files, onProgress, isInterrupted): | ||
# This method will fetch FILE TITLE YEAR IMDB RUNTIME from movies in the video library | ||
result = getMovies(movie_properties) | ||
|
||
movies = list() | ||
nbrMovies = len(result) | ||
|
||
for i in range(nbrMovies): | ||
m = result[i] | ||
|
||
if onProgress: | ||
onProgress(i * 100 / nbrMovies) | ||
|
||
path = removeFromStackAndRecurse(m["file"]) | ||
files.add(path) | ||
|
||
movie = { | ||
"file": path, | ||
"title": m["title"], | ||
"year": m["year"], | ||
"imdb": m["imdbnumber"], | ||
"runtime": m["runtime"] | ||
} | ||
|
||
movies.append(movie) | ||
|
||
if isInterrupted(): | ||
break | ||
|
||
return movies | ||
|
||
def getExtension(path): | ||
try: | ||
return path[path.rindex("."):].lower() | ||
except ValueError: | ||
return None | ||
|
||
def extractVideoFilesFromDirectory(files, videoFiles, directory, isInterrupted, onProgress = None): | ||
result = getDirectory(directory) | ||
|
||
thisDirectory = result | ||
nbrFiles = len(thisDirectory) | ||
for i in range(nbrFiles): | ||
f = thisDirectory[i] | ||
|
||
if onProgress: | ||
onProgress(i * 100 / nbrFiles) | ||
|
||
if f["filetype"] == "directory": | ||
extractVideoFilesFromDirectory(files, videoFiles, f["file"], isInterrupted) | ||
elif f["filetype"] == "file": | ||
path = removeFromStackAndRecurse(f["file"]) | ||
if path not in files and getExtension(path) in m_videoExtensions: | ||
|
||
# Here we could extract subtitles etc. | ||
|
||
videoFile = { | ||
"file": path | ||
} | ||
videoFiles.append(videoFile) | ||
|
||
if isInterrupted(): | ||
break | ||
|
||
def extractVideoFiles(files, onProgress, isInterrupted): | ||
sources = getSources() | ||
|
||
videoFiles = list() | ||
nbrSources = len(sources) | ||
|
||
for i in range(nbrSources): | ||
source = sources[i] | ||
|
||
if onProgress: | ||
onProgress(source["label"], i * 100 / len(sources)) | ||
|
||
def midProgress(percentage): | ||
onProgress(source["label"], (i * 100 + (percentage / nbrSources) + 1) / nbrSources) | ||
extractVideoFilesFromDirectory(files, videoFiles, source["file"], isInterrupted, midProgress) | ||
else: | ||
extractVideoFilesFromDirectory(files, videoFiles, source["file"], isInterrupted, None) | ||
|
||
if isInterrupted(): | ||
break | ||
|
||
return videoFiles | ||
|
||
def main(): | ||
isInterrupted = lambda : False | ||
|
||
files = set() | ||
episodes = extractEpisodes(files, None, isInterrupted) | ||
movies = extractMovies(files, None, isInterrupted) | ||
|
||
videoFiles = extractVideoFiles(files, None, isInterrupted) | ||
|
||
|
||
f = open('episodes.json', 'w') | ||
json.dump(episodes, f, sort_keys=True, indent=4) | ||
|
||
f = open('movies.json', 'w') | ||
json.dump(movies, f, sort_keys=True, indent=4) | ||
|
||
f = open('videoFiles.json', 'w') | ||
json.dump(videoFiles, f, sort_keys=True, indent=4) | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
class StateManager(object): | ||
def __init__(self): | ||
self.stack = list() | ||
self.active = None | ||
|
||
def switchTo(self, state): | ||
state.sm = self | ||
|
||
if self.active != None: | ||
self.active.close() | ||
self.stack.append(state) | ||
else: | ||
self.active = state | ||
|
||
def doModal(self): | ||
while self.active != None: | ||
self.active.doModal() | ||
self.active = None | ||
|
||
if len(self.stack) > 0: | ||
self.active = self.stack.pop(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
import xbmc, xbmcgui | ||
from xbmcjsonrpc import getSources | ||
import extraction | ||
import string | ||
import urllib2 | ||
import json | ||
|
||
def post(address, d): | ||
h = { | ||
"Content-Type": "application/json", | ||
|
||
# Some extra headers for fun | ||
"Accept": "*/*", # curl does this | ||
"User-Agent": "xbmc-gsoc2012-statistics", # otherwise it uses "Python-urllib/..." | ||
} | ||
|
||
req = urllib2.Request(address, headers = h, data = d) | ||
|
||
f = urllib2.urlopen(req) | ||
|
||
class SubmitState(object): | ||
def __init__(self, episodes, movies, videoFiles): | ||
self.episodes = episodes | ||
self.movies = movies | ||
self.videoFiles = videoFiles | ||
|
||
def doModal(self): | ||
dialog = xbmcgui.Dialog() | ||
ret = dialog.yesno('Submit?', '{0} episodes'.format(len(self.episodes)), '{0} movies'.format(len(self.movies)), '{0} video files'.format(len(self.videoFiles))) | ||
|
||
if ret: | ||
progress = xbmcgui.DialogProgress() | ||
ret = progress.create('GSoC 2012', 'Initializing upload...', "") | ||
|
||
progress.update(1, "Uploading episodes") | ||
post("http://127.0.0.1:8000/episodes", json.dumps(self.episodes)) | ||
if progress.iscanceled(): | ||
return | ||
|
||
progress.update(34, "Uploading movies") | ||
post("http://127.0.0.1:8000/movies", json.dumps(self.movies)) | ||
if progress.iscanceled(): | ||
return | ||
|
||
progress.update(67, "Uploading unscraped video files") | ||
post("http://127.0.0.1:8000/videofiles", json.dumps(self.videoFiles)) | ||
|
||
progress.update(100) | ||
progress.close() | ||
|
||
def close(self): | ||
pass | ||
|
||
class GatherState(object): | ||
def __init__(self, extractionSteps): | ||
self.gatherDialog = xbmcgui.DialogProgress() | ||
self.extractionSteps = extractionSteps | ||
|
||
def doModal(self): | ||
ret = self.gatherDialog.create('GSoC 2012', 'Initializing extractors...', "") | ||
|
||
try: | ||
self.steps = len(self.extractionSteps) | ||
files = set() | ||
|
||
episodes = list() | ||
if "episodes" in self.extractionSteps: | ||
def episodeProgress(percentage): | ||
self.gatherDialog.update(percentage / self.steps, "Extracting episodes", "", "") | ||
episodes = extraction.extractEpisodes(files, episodeProgress, self.gatherDialog.iscanceled) | ||
|
||
movies = list() | ||
if "movies" in self.extractionSteps: | ||
def movieProgress(percentage): | ||
self.gatherDialog.update((100 + percentage) / self.steps, "Extracting movies", "", "") | ||
movies = extraction.extractMovies(files, movieProgress, self.gatherDialog.iscanceled) | ||
|
||
videoFiles = list() | ||
sources = [s for s in getSources() if s["file"] in self.extractionSteps] | ||
nbrSources = len(sources) | ||
|
||
for i in range(nbrSources): | ||
source = sources[i] | ||
source["tick"] = 0 | ||
source["percentage"] = 0 | ||
|
||
def unscrapedIsCanceled(): | ||
source["tick"] = source["tick"] + 1 if source["tick"] < 5 else 0 | ||
s = string.join(['.' for s in range(source["tick"])]) | ||
offset = 200 + i * nbrSources | ||
|
||
self.gatherDialog.update((offset + source["percentage"]) / self.steps, "Extracting unscraped videos " + s, source["label"], "") | ||
|
||
return self.gatherDialog.iscanceled() | ||
|
||
def midProgress(i): | ||
source["percentage"] = i / nbrSources | ||
unscrapedIsCanceled() | ||
|
||
extraction.extractVideoFilesFromDirectory(files, videoFiles, source["file"], unscrapedIsCanceled, midProgress) | ||
|
||
except: | ||
raise | ||
finally: | ||
self.gatherDialog.close() | ||
|
||
self.sm.switchTo(SubmitState(episodes, movies, videoFiles)) | ||
|
||
def close(self): | ||
pass | ||
|
||
class InitialWindow(xbmcgui.Window): | ||
def __init__(self): | ||
self.strActionInfo = xbmcgui.ControlLabel(0, 0, 300, 200, 'Push BACK to cancel', 'font13', '0xFFFFFFFF') | ||
self.addControl(self.strActionInfo) | ||
|
||
self.choiceButton = list() | ||
self.choiceID = list() | ||
|
||
self.gather = xbmcgui.ControlButton(800, 50, 200, 100, "Next!") | ||
self.addControl(self.gather) | ||
|
||
self.addChoice("Submit scraped movies", "movies") | ||
self.addChoice("Submit scraped episodes", "episodes") | ||
self.addChoice("Submit scraped music videos", "musicvideos") | ||
|
||
for source in getSources(): | ||
self.addChoice(u'Submit unscraped videos from "' + source["label"] + u'"', source["file"]) | ||
|
||
self.setFocus(self.choiceButton[0]) | ||
self.gather.controlLeft(self.choiceButton[0]) | ||
|
||
def addChoice(self, label, ID): | ||
button = xbmcgui.ControlRadioButton(50, 50 + 50 * len(self.choiceButton), 600, 40, label) | ||
self.addControl(button) | ||
button.setSelected(True) | ||
|
||
if len(self.choiceButton) > 0: | ||
last = self.choiceButton[-1] | ||
last.controlDown(button) | ||
button.controlUp(last) | ||
|
||
button.controlRight(self.gather) | ||
|
||
self.choiceID.append(ID) | ||
self.choiceButton.append(button) | ||
|
||
def onControl(self, control): | ||
if control is self.gather: | ||
steps = [self.choiceID[self.choiceButton.index(b)] for b in self.choiceButton if b.isSelected()] | ||
self.sm.switchTo(GatherState(steps)) |
Oops, something went wrong.