From b695d962d82c208396bcfc6e14d5f1ed048dae03 Mon Sep 17 00:00:00 2001 From: Stephen Mackenzie Date: Thu, 15 Oct 2015 16:06:12 -0400 Subject: [PATCH 1/2] Replaced newline-dependent CMS parser with more robust regex parser --- cdl_convert/parse.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/cdl_convert/parse.py b/cdl_convert/parse.py index 1c59e86..7070ea8 100644 --- a/cdl_convert/parse.py +++ b/cdl_convert/parse.py @@ -469,10 +469,9 @@ def parse_cmx(input_file): # pylint: disable=R0912,R0914 cdls = [] with open(input_file, 'rb') as edl: - lines = edl.readlines() + lines = '\n'.join(edl.readlines()) filename = os.path.basename(input_file).split('.')[0] - def parse_cmx_clip(cmx_tuple): """Parses a three line cmx clip tuple.""" if len(cmx_tuple) != 3: @@ -500,18 +499,17 @@ def parse_cmx_clip(cmx_tuple): return cc - for i, line in enumerate(lines): - if line != '\r\n': - # We only care about newlines when reading CMX, because - # we use those to kick off parsing the next take. - continue - if i + 3 <= len(lines): - cc = parse_cmx_clip(lines[i + 1:i + 4]) + #This regex will avoid caring about extra stuff between the important lines we care about as long as the + #important lines we care about are in the right order + ccMatcher = re.compile(r'(\d*.+)(\n*.*)(\*FROM.+)(\n*.*)(\*ASC_SOP.+)(\n*.*)(\*ASC_SAT.+)(\n*.*)(\*SOURCE.+)') + clipEntries = ccMatcher.findall(lines) + for entry in clipEntries: + if entry[2] is not '' and entry[4] is not None and entry[6] is not None: + cc = parse_cmx_clip((entry[2],entry[4],entry[6])) + cdls.append(cc) else: continue - cdls.append(cc) - ccc = collection.ColorCollection() ccc.file_in = input_file ccc.append_children(cdls) From 6e9bc1f29499ca1d48e7004c2a90c2547f86d5c6 Mon Sep 17 00:00:00 2001 From: Stephen Mackenzie Date: Thu, 15 Oct 2015 17:45:14 -0400 Subject: [PATCH 2/2] adjustments to regex parsing in response to addtl edl examples --- cdl_convert/parse.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/cdl_convert/parse.py b/cdl_convert/parse.py index 7070ea8..93ded1a 100644 --- a/cdl_convert/parse.py +++ b/cdl_convert/parse.py @@ -477,7 +477,7 @@ def parse_cmx_clip(cmx_tuple): if len(cmx_tuple) != 3: print(cmx_tuple) return - title = cmx_tuple[0].split()[1] + title = cmx_tuple[0].split(': ')[1] sop = re.match( r'^\*ASC_SOP \(([\d\. -]+)\)\(([\d\. -]+)\)\(([\d\. -]+)\)', @@ -501,15 +501,26 @@ def parse_cmx_clip(cmx_tuple): #This regex will avoid caring about extra stuff between the important lines we care about as long as the #important lines we care about are in the right order - ccMatcher = re.compile(r'(\d*.+)(\n*.*)(\*FROM.+)(\n*.*)(\*ASC_SOP.+)(\n*.*)(\*ASC_SAT.+)(\n*.*)(\*SOURCE.+)') + ccMatcher = re.compile(r'(\d*.+)(\n*.*)(\*\ *FROM.+)(\n*.*)(\*\ *ASC_(SOP|SAT).+)(\n*.*)(\*\ *ASC_(SOP|SAT).+)') clipEntries = ccMatcher.findall(lines) for entry in clipEntries: - if entry[2] is not '' and entry[4] is not None and entry[6] is not None: - cc = parse_cmx_clip((entry[2],entry[4],entry[6])) + clip = entry[2] + sop = None + sat = None + i=0 + for group in entry: + if group == 'SOP': + sop = entry[i-1] + if group == 'SAT': + sat = entry[i-1] + i += 1 + if clip is not None and sop is not None and sat is not None: + cc = parse_cmx_clip((clip, sop, sat)) cdls.append(cc) else: continue + ccc = collection.ColorCollection() ccc.file_in = input_file ccc.append_children(cdls)