Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate playlists from songs listed in video descriptions #642 #649

Merged
merged 8 commits into from
Jun 16, 2017
64 changes: 58 additions & 6 deletions mps_youtube/commands/generate_playlist.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
"""
Playlist Generation
"""
from os import path
import pafy
from random import choice
import string
import pafy

from .. import content, g, playlists, screen, util
from .. import content, g, playlists, screen, util, listview
from ..playlist import Playlist
from . import command, search, album_search


@command(r'mkp\s*(.{1,100})')
def generate_playlist(sourcefile):
"""Generate a playlist from video titles in sourcefile"""

# Hooks into this, check if the argument --description is present
if "--description" in sourcefile or "-d" in sourcefile:
description_generator(sourcefile)
return

expanded_sourcefile = path.expanduser(sourcefile)
if not check_sourcefile(expanded_sourcefile):
g.message = util.F('mkp empty') % expanded_sourcefile
else:
queries = read_sourcefile(expanded_sourcefile)
g.message = util.F('mkp parsed') % (len(queries), sourcefile)
if len(queries) > 0:
if queries:
create_playlist(queries)
g.message = util.F('pl help')
g.content = content.playlists_display()
Expand All @@ -40,13 +49,13 @@ def check_sourcefile(filename):
return path.isfile(filename) and path.getsize(filename) > 0


def create_playlist(queries):
def create_playlist(queries, title=None):
"""Add a new playlist

Create playlist with a random name, get the first
match for each title in queries and append it to the playlist
"""
plname = random_plname()
plname = title.replace(" ", "-") or random_plname()
if not g.userpl.get(plname):
g.userpl[plname] = Playlist(plname)
for query in queries:
Expand All @@ -55,7 +64,7 @@ def create_playlist(queries):
qresult = find_best_match(query)
if qresult:
g.userpl[plname].songs.append(qresult)
if len(g.userpl[plname]) > 0:
if g.userpl[plname]:
playlists.save()


Expand All @@ -76,3 +85,46 @@ def random_plname():
n_chars = 6
return ''.join(choice(string.ascii_lowercase + string.digits)
for _ in range(n_chars))


def description_generator(text):
""" Fetches a videos description and parses it for
<artist> - <track> combinations
"""
if not isinstance(g.model, Playlist):
g.message = util.F("mkp desc unknown")
return

# Use only the first result, for now
num = text.replace("--description", "")
num = num.replace("-d", "")
num = util.number_string_to_list(num)[0]

query = {}
query['id'] = g.model[num].ytid
query['part'] = 'snippet'
query['maxResults'] = '1'
data = pafy.call_gdata('videos', query)['items'][0]['snippet']
title = "mkp %s" % data['title']
data = util.fetch_songs(data['description'], data['title'])

columns = [
{"name": "idx", "size": 3, "heading": "Num"},
{"name": "artist", "size": 30, "heading": "Artist"},
{"name": "title", "size": "remaining", "heading": "Title"},
]

def run_m(idx):
""" Create playlist based on the
results selected
"""
create_playlist(idx, title)

if data:
data = [listview.ListSongtitle(x) for x in data]
g.content = listview.ListView(columns, data, run_m)
g.message = util.F("mkp desc which data")
else:
g.message = util.F("mkp no valid")

return
162 changes: 162 additions & 0 deletions mps_youtube/description_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""
Module for trying to parse and retrieve song data from descriptions
"""
import re
import random
import pafy


def calculate_certainty(line):
""" Determine if a line contains a """
certainty_indexes = [
{'regex': r"(?:\(?(?:\d{0,4}:)?\d{0,2}:\d{0,2}\)?(?: - )?){1,2}",
'weight': 1},
{'regex': r"(([\w&()\[\]'\.\/ ]+)([ ]?[-]+[ ]?)([\w&()\[\]'\.\/ ]+))+",
'weight': 0.75},
{'regex': r"^([\d]+[. ]+)",
'weight': 1}
]

certainty = 0.0
for method in certainty_indexes:
if re.match(method['regex'], line):
certainty += method['weight']

return certainty / len(certainty_indexes)


def has_artist(text):
""" Determine if the strìng has artist or not """
regex = r"(?:([\w&()\[\]'\.\/ ]+)(?:[ ]?[-]+[ ]?)([\w&()\[\]'\.\/ ]+))+"
return not re.match(regex, text)


def strip_string(text, single=False):
""" Strip an artist-combo string """
# Removes timestamps
ts_reg = r"(?:\(?(?:\d{0,4}:)?\d{1,2}:\d{1,2}\)?(?: - )?){1,2}"
text = re.sub(ts_reg, "", text)

# Removes Tracknumbers.
text = re.sub(r"^([\d]+[. ]+)", "", text)

# Removes starting with non words
text = re.sub(r"^[^\w&()\[\]'\.\/]", "", text, flags=re.MULTILINE)

artist, track = None, None
if not single:
rgex = r"(?:([\w&()\[\]'\.\/ ]+)(?:[ ]?[-]+[ ]?)([\w&()\[\]'\.\/ ]+))+"
artist, track = (re.findall(rgex, text)[0])
else:
track = text

return artist, track


def long_substr(data):
""" https://stackoverflow.com/a/2894073 """
substr = ''
if len(data) > 1 and len(data[0]) > 0:
for i in range(len(data[0])):
for j in range(len(data[0])-i+1):
if j > len(substr) and is_substr(data[0][i:i+j], data):
substr = data[0][i:i+j]
return substr


def is_substr(find, data):
""" Check if is substring """
if len(data) < 1 and len(find) < 1:
return False
for i, _ in enumerate(data):
if find not in data[i]:
return False
return True


def artist_from_title(title):
""" Try to determine an artist by doing a search on the video
and try to find the most common element by n number of times looking
for the most common substring in a subset of the results from youtube
"""
query = {}
query['q'] = title
query['type'] = 'video'
query['fields'] = "items(snippet(title))"
query['maxResults'] = 50
query['part'] = "snippet"

results = pafy.call_gdata('search', query)['items']
titles = [x['snippet']['title'].upper() for x in results]

alts = {}
for _ in range(100):
random.shuffle(titles)
subset = titles[:10]
string = long_substr(subset).strip()
if len(string) > 3:
alts[string] = alts.get(string, 0) + 1

best_string = None
if len(alts) == 1:
best_string = list(alts.keys())[0].capitalize()
else:
best_guess = 99999
best_string = None

for key in list(alts.keys()):
current_guess = title.upper().find(key)
if current_guess < best_guess:
best_guess = current_guess
best_string = key.capitalize()

best_string = re.sub(r"([^\w]+)$", "", best_string)
best_string = re.sub(r"^([^\w]+)", "", best_string)
return best_string


def parse(text, title="Unknown"):
""" Main function"""

# Determine a certainty index for each line
lines = []
for line in text.split('\n'):
lines.append((calculate_certainty(line), line))

# Get average from all strings
certainty_average = sum([x[0] for x in lines]) / len(lines)

# Single out lines with above average certainty index
lines = filter(lambda a: a is not None,
[x if x[0] > certainty_average else None for x in lines])

# Determine if they are artist combo strings or only title
cmbs = []
for line in lines:
is_ac = has_artist(line[1])
cmbs.append(strip_string(line[1], is_ac))

# No or very few tracklists will ommit aritsts or add artist information
# on only a few select number of tracks, therefore we count entries with
# and without artist, and remove the anomalities IF the number of
# anomalities are small enough

counters = {'has': 0, 'not': 0}
for combo in cmbs:
counters['has' if combo[0] else 'not'] += 1

dominant = 'has' if counters['has'] > counters['not'] else 'not'

diff = abs(counters['has'] - counters['not'])
if diff > sum([counters['has'], counters['not']]):
print("Too many anomalities detected")
return []

if dominant == 'has':
cmbs = filter(lambda a: a is not None,
[x if x[0] is not None else None for x in cmbs])
else:
arti = artist_from_title(title)
cmbs = filter(lambda a: a is not None,
[(arti, x[1]) if x[0] is None else None for x in cmbs])
return list(cmbs)
8 changes: 7 additions & 1 deletion mps_youtube/g.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,10 @@
'mkp parsed': "*&&* entries found in *&&*",
'mkp parsed_': (c.g, c.w, c.b, c.w),
'mkp finding': "Finding the best match for *&&* ...",
'mkp finding_': (c.y, c.w)}
'mkp finding_': (c.y, c.w),
'mkp desc unknown': "Unknown tabletype, *do a new search*",
'mkp desc unknown_': (c.y, c.w),
'mkp desc which data': "Which *tracks* to include?",
'mkp desc which data_': (c.y, c.w),
'mkp no valid': "*No valid tracks found in that description*",
'mkp no valid_': (c.y, c.w)}
8 changes: 7 additions & 1 deletion mps_youtube/helptext.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Holds all help text
"""
from . import c, g
from .util import get_near_name, F

Expand Down Expand Up @@ -54,6 +57,9 @@ def helptext():
{2}mkp <fullfilepath>{1} - Creates a playlist automatically with video titles from fullfilepath
<fullfilepath>: Full path of text file with one title per line

{2}mkp -d <search result number>{1} - Create a playlist based on tracks
listed in that videos description. (Alternatively one can use {2}--description{1})

{2}user <username>{1} - list YouTube uploads by <username>.
{2}user <username>/<query>{1} - as above, but matches <query>.
{2}userpl <username>{1} - list YouTube playlists created by <username>.
Expand Down Expand Up @@ -292,7 +298,7 @@ def get_help(choice):
"invoke": "command commands mpsyt invocation".split(),

"search": ("user userpl pl pls r n p url album "
"editing result results related remove swop".split()),
"editing result results related remove swop mkp --description".split()),

"edit": ("editing manupulate manipulating rm mv sw edit move "
"swap shuffle".split()),
Expand Down
80 changes: 5 additions & 75 deletions mps_youtube/listview.py → mps_youtube/listview/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,83 +4,13 @@
import re
import math

from . import c, g, util, content
from .. import c, g, util, content
from .base import ListViewItem
from .user import ListUser
from .livestream import ListLiveStream
from .songtitle import ListSongtitle


class ListViewItem:
""" TODO
"""
data = None

def __init__(self, data):
self.data = data

def __getattr__(self, key):
return self.data[key] if key in self.data.keys() else None

def length(self, _=0):
""" Returns length of ListViewItem
A LVI has to return something for length
even if the item does not have one.
"""
return 0


class ListUser(ListViewItem):
""" Describes a user
"""
# pylint: disable=unused-argument
def id(self, length=0):
""" Returns YTID """
return self.data.get("id").get("channelId")

def name(self, length=10):
""" Returns channel name """
return util.uea_pad(length, self.data.get("snippet").get("title"))

def description(self, length=10):
""" Channel description"""
return util.uea_pad(length, self.data.get("snippet").get("description"))

def kind(self, length=10):
""" Returns the youtube datatype
Example: youtube#channel, youtube#video
"""
return self.data.get("id").get("kind")

def ret(self):
""" Used in the ListView play function """
return (self.data.get("snippet").get("title"), self.id(), "")

@staticmethod
def return_field():
""" Determines which function will be called on selected items """
return "ret"


class ListLiveStream(ListViewItem):
""" Class exposing necessary components of a live stream """
# pylint: disable=unused-argument
def ytid(self, lngt=10):
""" Exposes ytid(string) """
return self.data.get("id").get("videoId")

def ret(self):
""" Returns content.video compatible tuple """
return (self.ytid(), self.title(), self.length())

def title(self, lngt=10):
""" exposes title """
return util.uea_pad(lngt, self.data.get("snippet").get("title"))
def description(self, lngt=10):
""" exposes description """
return util.uea_pad(lngt, self.data.get("snippet").get("description"))

@staticmethod
def return_field():
""" ret """
return "ret"

class ListView(content.PaginatedContent):
""" Content Agnostic Numbered List

Expand Down
Loading