diff --git a/patacrep/authors.py b/patacrep/authors.py index 73f3fc7a..a1feda2e 100644 --- a/patacrep/authors.py +++ b/patacrep/authors.py @@ -64,7 +64,7 @@ def split_author_names(string): brace_count += 1 if char == "{": brace_count -= 1 - return string[:last_space], string[last_space:] + return string[last_space:], string[:last_space] def split_sep_author(string, sep): @@ -162,23 +162,6 @@ def processauthors_clean_authors(authors_list): if author.lstrip() ] -def processauthors_invert_names(authors_list): - """Move first names after last names - - See docstring of processauthors() for more information. - """ - dest = [] - for author in authors_list: - first, last = split_author_names(author) - if first: - dest.append(ur"\indexauthor{{{first}}}{{{last}}}".format( - first=first.strip(), - last=last.strip(), - )) - else: - dest.append(last.lstrip()) - return dest - def processauthors(authors_string, after=None, ignore=None, sep=None): r"""Return a list of authors @@ -210,10 +193,12 @@ def processauthors(authors_string, after=None, ignore=None, sep=None): 4) Strings containing words of "ignore" are dropped. # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"] - 5) First and last names are processed through LaTeX command \indexauthor - (which will, by default, invert first and last names). - # ["\indexauthor{William}{Blake}", "\indexauthor{Hubert}{Parry}", - # \indexthaor{The}{Royal\ Choir~of~Nowhere}"] + 5) First and last names are splitted + # [ + # ("Blake", "William"), + # ("Parry", "Hubert"), + # ("Royal\ Choir~of~Nowhere", "The"), + # ] """ if not sep: @@ -223,8 +208,10 @@ def processauthors(authors_string, after=None, ignore=None, sep=None): if not ignore: ignore = [] - return processauthors_invert_names( - processauthors_clean_authors( + return [ + split_author_names(author) + for author + in processauthors_clean_authors( processauthors_ignore_authors( processauthors_remove_after( processauthors_split_string( @@ -235,5 +222,4 @@ def processauthors(authors_string, after=None, ignore=None, sep=None): after), ignore) ) - ) - + ] diff --git a/patacrep/build.py b/patacrep/build.py index dc5cdda0..13a173ab 100755 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -13,6 +13,7 @@ from patacrep import __DATADIR__, authors, content, errors from patacrep.index import process_sxd from patacrep.templates import TexRenderer +from patacrep.songs import DataSubpath LOGGER = logging.getLogger(__name__) EOL = "\n" @@ -75,7 +76,7 @@ def _set_datadir(self): self.config['datadir'] = abs_datadir self.config['_songdir'] = [ - os.path.join(path, 'songs') + DataSubpath(path, 'songs') for path in self.config['datadir'] ] diff --git a/patacrep/content/cwd.py b/patacrep/content/cwd.py index fff4c10e..338adb76 100755 --- a/patacrep/content/cwd.py +++ b/patacrep/content/cwd.py @@ -3,9 +3,8 @@ """Change base directory before importing songs.""" -import os - from patacrep.content import process_content +from patacrep.songs import DataSubpath #pylint: disable=unused-argument def parse(keyword, config, argument, contentlist): @@ -28,8 +27,8 @@ def parse(keyword, config, argument, contentlist): """ old_songdir = config['_songdir'] config['_songdir'] = ( - [argument] + - [os.path.join(path, argument) for path in config['_songdir']] + + [DataSubpath("", argument)] + + [path.clone().join(argument) for path in config['_songdir']] + config['_songdir'] ) processed_content = process_content(contentlist, config) diff --git a/patacrep/content/include.py b/patacrep/content/include.py index 0b4a8967..45540db9 100644 --- a/patacrep/content/include.py +++ b/patacrep/content/include.py @@ -17,6 +17,10 @@ LOGGER = logging.getLogger(__name__) def load_from_datadirs(path, config=None): + """Load 'path' from one of the datadirs. + + Raise an exception if it was found if none of the datadirs of 'config'. + """ for datadir in config.get("datadir", []): filepath = os.path.join(datadir, path) if os.path.exists(filepath): diff --git a/patacrep/content/song.py b/patacrep/content/song.py index 9965c861..18dbc252 100755 --- a/patacrep/content/song.py +++ b/patacrep/content/song.py @@ -35,7 +35,7 @@ def end_block(self, __context): def render(self, context): """Return the string that will render the song.""" return ur'\input{{{}}}'.format(files.relpath( - self.path, + self.fullpath, os.path.dirname(context['filename']) )) @@ -59,21 +59,28 @@ def parse(keyword, argument, contentlist, config): if contentlist: break contentlist = [ - files.relpath(filename, songdir) + filename for filename in ( - files.recursive_find(songdir, "*.sg") - + files.recursive_find(songdir, "*.is") + files.recursive_find(songdir.fullpath, "*.sg") + + files.recursive_find(songdir.fullpath, "*.is") ) ] for elem in contentlist: before = len(songlist) for songdir in config['_songdir']: - for filename in glob.iglob(os.path.join(songdir, elem)): - LOGGER.debug('Parsing file "{}"…'.format(filename)) - song = SongRenderer(filename, config) - songlist.append(song) - config["_languages"].update(song.languages) + if songdir.datadir and not os.path.isdir(songdir.datadir): + continue + with files.chdir(songdir.datadir): + for filename in glob.iglob(os.path.join(songdir.subpath, elem)): + LOGGER.debug('Parsing file "{}"…'.format(filename)) + song = SongRenderer( + songdir.datadir, + filename, + config, + ) + songlist.append(song) + config["_languages"].update(song.languages) if len(songlist) > before: break if len(songlist) == before: diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index 12c63189..e9a5e677 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -33,8 +33,8 @@ def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" if isinstance(field, basestring): return normalize_string(field) - elif isinstance(field, list): - return [normalize_string(string) for string in field] + elif isinstance(field, list) or isinstance(field, tuple): + return [normalize_field(string) for string in field] def key_generator(sort): """Return a function that returns the list of values used to sort the song. @@ -50,7 +50,7 @@ def ordered_song_keys(song): if key == "@title": field = song.unprefixed_titles elif key == "@path": - field = song.path + field = song.fullpath elif key == "by": field = song.authors else: @@ -60,7 +60,7 @@ def ordered_song_keys(song): LOGGER.debug( "Ignoring unknown key '{}' for song {}.".format( key, - files.relpath(song.path), + files.relpath(song.fullpath), ) ) field = u"" diff --git a/patacrep/content/tex.py b/patacrep/content/tex.py index 1e18ecfd..ad109126 100755 --- a/patacrep/content/tex.py +++ b/patacrep/content/tex.py @@ -41,8 +41,11 @@ def parse(keyword, argument, contentlist, config): for filename in contentlist: checked_file = None for path in config['_songdir']: - if os.path.exists(os.path.join(path, filename)): - checked_file = os.path.relpath(os.path.join(path, filename)) + if os.path.exists(os.path.join(path.fullpath, filename)): + checked_file = os.path.relpath(os.path.join( + path.fullpath, + filename, + )) break if not checked_file: LOGGER.warning( diff --git a/patacrep/data/examples/.gitignore b/patacrep/data/examples/.gitignore new file mode 100644 index 00000000..8c36c429 --- /dev/null +++ b/patacrep/data/examples/.gitignore @@ -0,0 +1 @@ +/.cache diff --git a/patacrep/files.py b/patacrep/files.py index 31cc4e2e..5ba23df2 100644 --- a/patacrep/files.py +++ b/patacrep/files.py @@ -4,6 +4,7 @@ """File system utilities.""" +from contextlib import contextmanager import fnmatch import os @@ -12,10 +13,14 @@ def recursive_find(root_directory, pattern): Return a list of files matching the pattern. """ + if not os.path.isdir(root_directory): + return [] + matches = [] - for root, _, filenames in os.walk(root_directory): - for filename in fnmatch.filter(filenames, pattern): - matches.append(os.path.join(root, filename)) + with chdir(root_directory): + for root, _, filenames in os.walk(os.curdir): + for filename in fnmatch.filter(filenames, pattern): + matches.append(os.path.join(root, filename)) return matches def relpath(path, start=None): @@ -26,3 +31,22 @@ def relpath(path, start=None): return os.path.relpath(path, start) else: return os.path.abspath(path) + +@contextmanager +def chdir(path): + """Locally change dir + + Can be used as: + + with chdir("some/directory"): + do_stuff() + + """ + olddir = os.getcwd() + if path: + os.chdir(path) + yield + os.chdir(olddir) + else: + yield + diff --git a/patacrep/index.py b/patacrep/index.py index 27fa1eb1..63f3058d 100755 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -22,18 +22,6 @@ FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) -def sortkey(value): - """From a title, return something usable for sorting. - - It handles locale (but - don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles - the sort with latex escape sequences. - """ - return locale.strxfrm( - encoding.unidecode(simpleparse(value).replace(' ', 'A')).lower() - ) - - def process_sxd(filename): """Parse sxd file. @@ -115,12 +103,18 @@ def _raw_add(self, key, number, link): No processing is done on data. It is added raw. See add() for a similar method with processing. """ - first = self.get_first_letter(key) + first = self.get_first_letter(key[0]) if not first in self.data.keys(): self.data[first] = dict() if not key in self.data[first].keys(): - self.data[first][key] = [] - self.data[first][key].append({'num': number, 'link': link}) + self.data[first][key] = { + 'sortingkey': [ + encoding.unidecode(simpleparse(item)).lower() + for item in key + ], + 'entries': [], + } + self.data[first][key]['entries'].append({'num': number, 'link': link}) def add(self, key, number, link): """Add a song to the list. @@ -133,15 +127,15 @@ def add(self, key, number, link): match = pattern.match(key) if match: self._raw_add( - ur"\indextitle{{{}}}{{{}}}".format( + ( match.group(1).strip(), - (match.group(2) + match.group(3)).strip(), - ), + (match.group(2) + match.group(3)).strip() + ), number, link ) return - self._raw_add(key, number, link) + self._raw_add((key, ""), number, link) if self.indextype == "AUTHOR": # Processing authors @@ -155,10 +149,26 @@ def ref_to_str(ref): """Return the LaTeX code corresponding to the reference.""" return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) + def key_to_str(self, key): + """Convert the key (title or author) to the LaTeX command rendering it. + + """ + if self.indextype == "AUTHOR": + if key[1]: + return ur"\indexauthor{{{first}}}{{{last}}}".format( + first=key[1], + last=key[0], + ) + else: + return key[0] + + if self.indextype == "TITLE": + return ur"\indextitle{{{0[0]}}}{{{0[1]}}}".format(key) + def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( - key, + self.key_to_str(key), ur'\\'.join([self.ref_to_str(ref) for ref in entry]), ) @@ -168,9 +178,16 @@ def idxblock_to_str(self, letter, entries): Here, an index block is a letter, and all data beginning with this letter. """ + def sortkey(key): + """Return something sortable for `entries[key]`.""" + return [ + locale.strxfrm(item) + for item + in entries[key]['sortingkey'] + ] string = ur'\begin{idxblock}{' + letter + '}' + EOL - for key in sorted(entries.keys(), key=sortkey): - string += self.entry_to_str(key, entries[key]) + for key in sorted(entries, key=sortkey): + string += self.entry_to_str(key, entries[key]['entries']) string += ur'\end{idxblock}' + EOL return string diff --git a/patacrep/plastex.py b/patacrep/plastex.py index b1c906b2..ecfa2d00 100644 --- a/patacrep/plastex.py +++ b/patacrep/plastex.py @@ -39,6 +39,7 @@ def simpleparse(text): """Parse a simple LaTeX string. """ tex = TeX() + tex.disableLogging() tex.input(text) doc = tex.parse() return process_unbr_spaces(doc.textContent) diff --git a/patacrep/songs.py b/patacrep/songs.py index a3adc856..5f3a7d8c 100755 --- a/patacrep/songs.py +++ b/patacrep/songs.py @@ -3,19 +3,121 @@ """Song management.""" +import errno +import hashlib +import logging +import os import re +try: + import cPickle as pickle +except ImportError: + import pickle + from patacrep.authors import processauthors from patacrep.plastex import parsetex -# pylint: disable=too-few-public-methods +LOGGER = logging.getLogger(__name__) + +def cached_name(datadir, filename): + """Return the filename of the cache version of the file.""" + fullpath = os.path.abspath(os.path.join(datadir, '.cache', filename)) + directory = os.path.dirname(fullpath) + try: + os.makedirs(directory) + except OSError as error: + if error.errno == errno.EEXIST and os.path.isdir(directory): + pass + else: + raise + return fullpath + +class DataSubpath(object): + """A path divided in two path: a datadir, and its subpath. + + - This object can represent either a file or directory. + - If the datadir part is the empty string, it means that the represented + path does not belong to a datadir. + """ + + def __init__(self, datadir, subpath): + if os.path.isabs(subpath): + self.datadir = "" + else: + self.datadir = datadir + self.subpath = subpath + + def __str__(self): + return os.path.join(self.datadir, self.subpath) + + @property + def fullpath(self): + """Return the full path represented by self.""" + return os.path.join(self.datadir, self.subpath) + + def clone(self): + """Return a cloned object.""" + return DataSubpath(self.datadir, self.subpath) + + def join(self, path): + """Join "path" argument to self path. + + Return self for commodity. + """ + self.subpath = os.path.join(self.subpath, path) + return self + +# pylint: disable=too-few-public-methods, too-many-instance-attributes class Song(object): """Song management""" - def __init__(self, filename, config): + # Version format of cached song. Increment this number if we update + # information stored in cache. + CACHE_VERSION = 0 + + # List of attributes to cache + cached_attributes = [ + "titles", + "unprefixed_titles", + "args", + "datadir", + "fullpath", + "subpath", + "languages", + "authors", + "_filehash", + "_version", + ] + + def __init__(self, datadir, subpath, config): + self.fullpath = os.path.join(datadir, subpath) + if datadir: + # Only songs in datadirs are cached + self._filehash = hashlib.md5( + open(self.fullpath, 'rb').read() + ).hexdigest() + if os.path.exists(cached_name(datadir, subpath)): + try: + cached = pickle.load(open( + cached_name(datadir, subpath), + 'rb', + )) + if ( + cached['_filehash'] == self._filehash + and cached['_version'] == self.CACHE_VERSION + ): + for attribute in self.cached_attributes: + setattr(self, attribute, cached[attribute]) + return + except: # pylint: disable=bare-except + LOGGER.warning("Could not use cached version of {}.".format( + self.fullpath + )) + # Data extraction from the song with plastex - data = parsetex(filename) + data = parsetex(self.fullpath) self.titles = data['titles'] + self.datadir = datadir self.unprefixed_titles = [ unprefixed_title( title, @@ -25,7 +127,7 @@ def __init__(self, filename, config): in self.titles ] self.args = data['args'] - self.path = filename + self.subpath = subpath self.languages = data['languages'] if "by" in self.args.keys(): self.authors = processauthors( @@ -35,8 +137,30 @@ def __init__(self, filename, config): else: self.authors = [] + self._version = self.CACHE_VERSION + self._write_cache() + + def _write_cache(self): + """If relevant, write a dumbed down version of self to the cache.""" + if self.datadir: + cached = {} + for attribute in self.cached_attributes: + if attribute == "args": + cached[attribute] = dict([ + (key, u"{}".format(value)) # Force conversion to unicode + for (key, value) + in self.args.iteritems() + ]) + else: + cached[attribute] = getattr(self, attribute) + pickle.dump( + cached, + open(cached_name(self.datadir, self.subpath), 'wb'), + protocol=-1 + ) + def __repr__(self): - return repr((self.titles, self.args, self.path)) + return repr((self.titles, self.args, self.fullpath)) def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any).