mps-youtube · ids1024 · Jun 16, 2017 · Jun 13, 2017 · Jun 13, 2017 · Jun 13, 2017
diff --git a/mps_youtube/commands/generate_playlist.py b/mps_youtube/commands/generate_playlist.py
@@ -1,23 +1,32 @@
+"""
+    Playlist Generation
+"""
 from os import path
-import pafy
 from random import choice
 import string
+import pafy
 
-from .. import content, g, playlists, screen, util
+from .. import content, g, playlists, screen, util, listview
 from ..playlist import Playlist
 from . import command, search, album_search
 
 
 @command(r'mkp\s*(.{1,100})')
 def generate_playlist(sourcefile):
     """Generate a playlist from video titles in sourcefile"""
+
+    # Hooks into this, check if the argument --description is present
+    if "--description" in sourcefile or "-d" in sourcefile:
+        description_generator(sourcefile)
+        return
+
     expanded_sourcefile = path.expanduser(sourcefile)
     if not check_sourcefile(expanded_sourcefile):
         g.message = util.F('mkp empty') % expanded_sourcefile
     else:
         queries = read_sourcefile(expanded_sourcefile)
         g.message = util.F('mkp parsed') % (len(queries), sourcefile)
-        if len(queries) > 0:
+        if queries:
             create_playlist(queries)
             g.message = util.F('pl help')
             g.content = content.playlists_display()
@@ -40,13 +49,13 @@ def check_sourcefile(filename):
     return path.isfile(filename) and path.getsize(filename) > 0
 
 
-def create_playlist(queries):
+def create_playlist(queries, title=None):
     """Add a new playlist
 
     Create playlist with a random name, get the first
     match for each title in queries and append it to the playlist
     """
-    plname = random_plname()
+    plname = title.replace(" ", "-") or random_plname()
     if not g.userpl.get(plname):
         g.userpl[plname] = Playlist(plname)
     for query in queries:
@@ -55,7 +64,7 @@ def create_playlist(queries):
         qresult = find_best_match(query)
         if qresult:
             g.userpl[plname].songs.append(qresult)
-    if len(g.userpl[plname]) > 0:
+    if g.userpl[plname]:
         playlists.save()
 
 
@@ -76,3 +85,46 @@ def random_plname():
     n_chars = 6
     return ''.join(choice(string.ascii_lowercase + string.digits)
                    for _ in range(n_chars))
+
+
+def description_generator(text):
+    """ Fetches a videos description and parses it for
+        <artist> - <track> combinations
+    """
+    if not isinstance(g.model, Playlist):
+        g.message = util.F("mkp desc unknown")
+        return
+
+    # Use only the first result, for now
+    num = text.replace("--description", "")
+    num = num.replace("-d", "")
+    num = util.number_string_to_list(num)[0]
+
+    query = {}
+    query['id'] = g.model[num].ytid
+    query['part'] = 'snippet'
+    query['maxResults'] = '1'
+    data = pafy.call_gdata('videos', query)['items'][0]['snippet']
+    title = "mkp %s" % data['title']
+    data = util.fetch_songs(data['description'], data['title'])
+
+    columns = [
+        {"name": "idx", "size": 3, "heading": "Num"},
+        {"name": "artist", "size": 30, "heading": "Artist"},
+        {"name": "title", "size": "remaining", "heading": "Title"},
+    ]
+
+    def run_m(idx):
+        """ Create playlist based on the
+            results selected
+        """
+        create_playlist(idx, title)
+
+    if data:
+        data = [listview.ListSongtitle(x) for x in data]
+        g.content = listview.ListView(columns, data, run_m)
+        g.message = util.F("mkp desc which data")
+    else:
+        g.message = util.F("mkp no valid")
+
+    return
diff --git a/mps_youtube/description_parser.py b/mps_youtube/description_parser.py
@@ -0,0 +1,162 @@
+"""
+    Module for trying to parse and retrieve song data from descriptions
+"""
+import re
+import random
+import pafy
+
+
+def calculate_certainty(line):
+    """ Determine if a line contains a  """
+    certainty_indexes = [
+        {'regex': r"(?:\(?(?:\d{0,4}:)?\d{0,2}:\d{0,2}\)?(?: - )?){1,2}",
+         'weight': 1},
+        {'regex': r"(([\w&()\[\]'\.\/ ]+)([ ]?[-]+[ ]?)([\w&()\[\]'\.\/ ]+))+",
+         'weight': 0.75},
+        {'regex': r"^([\d]+[. ]+)",
+         'weight': 1}
+    ]
+
+    certainty = 0.0
+    for method in certainty_indexes:
+        if re.match(method['regex'], line):
+            certainty += method['weight']
+
+    return certainty / len(certainty_indexes)
+
+
+def has_artist(text):
+    """ Determine if the strìng has artist or not """
+    regex = r"(?:([\w&()\[\]'\.\/ ]+)(?:[ ]?[-]+[ ]?)([\w&()\[\]'\.\/ ]+))+"
+    return not re.match(regex, text)
+
+
+def strip_string(text, single=False):
+    """ Strip an artist-combo string """
+    # Removes timestamps
+    ts_reg = r"(?:\(?(?:\d{0,4}:)?\d{1,2}:\d{1,2}\)?(?: - )?){1,2}"
+    text = re.sub(ts_reg, "", text)
+
+    # Removes Tracknumbers.
+    text = re.sub(r"^([\d]+[. ]+)", "", text)
+
+    # Removes starting with non words
+    text = re.sub(r"^[^\w&()\[\]'\.\/]", "", text, flags=re.MULTILINE)
+
+    artist, track = None, None
+    if not single:
+        rgex = r"(?:([\w&()\[\]'\.\/ ]+)(?:[ ]?[-]+[ ]?)([\w&()\[\]'\.\/ ]+))+"
+        artist, track = (re.findall(rgex, text)[0])
+    else:
+        track = text
+
+    return artist, track
+
+
+def long_substr(data):
+    """ https://stackoverflow.com/a/2894073 """
+    substr = ''
+    if len(data) > 1 and len(data[0]) > 0:
+        for i in range(len(data[0])):
+            for j in range(len(data[0])-i+1):
+                if j > len(substr) and is_substr(data[0][i:i+j], data):
+                    substr = data[0][i:i+j]
+    return substr
+
+
+def is_substr(find, data):
+    """ Check if is substring """
+    if len(data) < 1 and len(find) < 1:
+        return False
+    for i, _ in enumerate(data):
+        if find not in data[i]:
+            return False
+    return True
+
+
+def artist_from_title(title):
+    """ Try to determine an artist by doing a search on the video
+        and try to find the most common element by n number of times looking
+        for the most common substring in a subset of the results from youtube
+    """
+    query = {}
+    query['q'] = title
+    query['type'] = 'video'
+    query['fields'] = "items(snippet(title))"
+    query['maxResults'] = 50
+    query['part'] = "snippet"
+
+    results = pafy.call_gdata('search', query)['items']
+    titles = [x['snippet']['title'].upper() for x in results]
+
+    alts = {}
+    for _ in range(100):
+        random.shuffle(titles)
+        subset = titles[:10]
+        string = long_substr(subset).strip()
+        if len(string) > 3:
+            alts[string] = alts.get(string, 0) + 1
+
+    best_string = None
+    if len(alts) == 1:
+        best_string = list(alts.keys())[0].capitalize()
+    else:
+        best_guess = 99999
+        best_string = None
+
+        for key in list(alts.keys()):
+            current_guess = title.upper().find(key)
+            if current_guess < best_guess:
+                best_guess = current_guess
+                best_string = key.capitalize()
+
+    best_string = re.sub(r"([^\w]+)$", "", best_string)
+    best_string = re.sub(r"^([^\w]+)", "", best_string)
+    return best_string
+
+
+def parse(text, title="Unknown"):
+    """ Main function"""
+
+    # Determine a certainty index for each line
+    lines = []
+    for line in text.split('\n'):
+        lines.append((calculate_certainty(line), line))
+
+    # Get average from all strings
+    certainty_average = sum([x[0] for x in lines]) / len(lines)
+
+    # Single out lines with above average certainty index
+    lines = filter(lambda a: a is not None,
+                   [x if x[0] > certainty_average else None for x in lines])
+
+    # Determine if they are artist combo strings or only title
+    cmbs = []
+    for line in lines:
+        is_ac = has_artist(line[1])
+        cmbs.append(strip_string(line[1], is_ac))
+
+    # No or very few tracklists will ommit aritsts or add artist information
+    # on only a few select number of tracks, therefore we count entries with
+    # and without artist, and remove the anomalities IF the number of
+    # anomalities are small enough
+
+    counters = {'has': 0, 'not': 0}
+    for combo in cmbs:
+        counters['has' if combo[0] else 'not'] += 1
+
+    dominant = 'has' if counters['has'] > counters['not'] else 'not'
+
+    diff = abs(counters['has'] - counters['not'])
+    if diff > sum([counters['has'], counters['not']]):
+        print("Too many anomalities detected")
+        return []
+
+    if dominant == 'has':
+        cmbs = filter(lambda a: a is not None,
+                      [x if x[0] is not None else None for x in cmbs])
+    else:
+        arti = artist_from_title(title)
+        cmbs = filter(lambda a: a is not None,
+                      [(arti, x[1]) if x[0] is None else None for x in cmbs])
+    return list(cmbs)
diff --git a/mps_youtube/g.py b/mps_youtube/g.py
@@ -165,4 +165,10 @@
     'mkp parsed': "*&&* entries found in *&&*",
     'mkp parsed_': (c.g, c.w, c.b, c.w),
     'mkp finding': "Finding the best match for *&&* ...",
-    'mkp finding_': (c.y, c.w)}
+    'mkp finding_': (c.y, c.w),
+    'mkp desc unknown': "Unknown tabletype, *do a new search*",
+    'mkp desc unknown_': (c.y, c.w),
+    'mkp desc which data': "Which *tracks* to include?",
+    'mkp desc which data_': (c.y, c.w),
+    'mkp no valid': "*No valid tracks found in that description*",
+    'mkp no valid_': (c.y, c.w)}
diff --git a/mps_youtube/helptext.py b/mps_youtube/helptext.py
@@ -1,3 +1,6 @@
+"""
+    Holds all help text
+"""
 from . import c, g
 from .util import get_near_name, F
 
@@ -54,6 +57,9 @@ def helptext():
     {2}mkp <fullfilepath>{1} - Creates a playlist automatically with video titles from fullfilepath
     <fullfilepath>: Full path of text file with one title per line
 
+    {2}mkp -d <search result number>{1} - Create a playlist based on tracks
+    listed in that videos description. (Alternatively one can use {2}--description{1})
+
     {2}user <username>{1} - list YouTube uploads by <username>.
     {2}user <username>/<query>{1} - as above, but matches <query>.
     {2}userpl <username>{1} - list YouTube playlists created by <username>.
@@ -292,7 +298,7 @@ def get_help(choice):
              "invoke": "command commands mpsyt invocation".split(),
 
              "search": ("user userpl pl pls r n p url album "
-                        "editing result results related remove swop".split()),
+                        "editing result results related remove swop mkp --description".split()),
 
              "edit": ("editing manupulate manipulating rm mv sw edit move "
                       "swap shuffle".split()),

diff --git a/mps_youtube/listview.py → mps_youtube/listview/__init__.py b/mps_youtube/listview.py → mps_youtube/listview/__init__.py
@@ -4,83 +4,13 @@
 import re
 import math
 
-from . import c, g, util, content
+from .. import c, g, util, content
+from .base import ListViewItem
+from .user import ListUser
+from .livestream import ListLiveStream
+from .songtitle import ListSongtitle
 
 
-class ListViewItem:
-    """ TODO
-    """
-    data = None
-
-    def __init__(self, data):
-        self.data = data
-
-    def __getattr__(self, key):
-        return self.data[key] if key in self.data.keys() else None
-
-    def length(self, _=0):
-        """ Returns length of ListViewItem
-            A LVI has to return something for length
-            even if the item does not have one.
-        """
-        return 0
-
-
-class ListUser(ListViewItem):
-    """ Describes a user
-    """
-    # pylint: disable=unused-argument
-    def id(self, length=0):
-        """ Returns YTID """
-        return self.data.get("id").get("channelId")
-
-    def name(self, length=10):
-        """ Returns channel name """
-        return util.uea_pad(length, self.data.get("snippet").get("title"))
-
-    def description(self, length=10):
-        """ Channel description"""
-        return util.uea_pad(length, self.data.get("snippet").get("description"))
-
-    def kind(self, length=10):
-        """ Returns the youtube datatype
-            Example: youtube#channel, youtube#video
-        """
-        return self.data.get("id").get("kind")
-
-    def ret(self):
-        """ Used in the ListView play function """
-        return (self.data.get("snippet").get("title"), self.id(), "")
-
-    @staticmethod
-    def return_field():
-        """ Determines which function will be called on selected items """
-        return "ret"
-
-
-class ListLiveStream(ListViewItem):
-    """ Class exposing necessary components of a live stream """
-    # pylint: disable=unused-argument
-    def ytid(self, lngt=10):
-        """ Exposes ytid(string) """
-        return self.data.get("id").get("videoId")
-
-    def ret(self):
-        """ Returns content.video compatible tuple """
-        return (self.ytid(), self.title(), self.length())
-
-    def title(self, lngt=10):
-        """ exposes title """
-        return util.uea_pad(lngt, self.data.get("snippet").get("title"))
-    def description(self, lngt=10):
-        """ exposes description """
-        return util.uea_pad(lngt, self.data.get("snippet").get("description"))
-
-    @staticmethod
-    def return_field():
-        """ ret """
-        return "ret"
-
 class ListView(content.PaginatedContent):
     """ Content Agnostic Numbered List