/
m3u.py
184 lines (142 loc) · 5.1 KB
/
m3u.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
r"""jc - JSON Convert M3U and M3U8 file parser
This parser will make a best-effort to parse extended field information. If
the extended fields cannot be successfully parsed, then an `unparsed_info`
field will be added to the object. If not using `--quiet`, then a warning
message also will be printed to `STDERR`.
Usage (cli):
$ cat playlist.m3u | jc --m3u
Usage (module):
import jc
result = jc.parse('m3u', m3u_file_output)
Schema:
[
{
"runtime": integer,
"display": string,
"path": string,
<extended fields>: string, # [0]
"unparsed_info": string, # [1]
}
]
[0] Field names are pulled directly from the #EXTINF: line
[1] Only added if the extended information cannot be parsed
Examples:
$ cat playlist.m3u | jc --m3u -p
[
{
"runtime": 105,
"display": "Example artist - Example title",
"path": "C:\\Files\\My Music\\Example.mp3"
},
{
"runtime": 321,
"display": "Example Artist2 - Example title2",
"path": "C:\\Files\\My Music\\Favorites\\Example2.ogg"
}
]
$ cat playlist.m3u | jc --m3u -p -r
[
{
"runtime": "105",
"display": "Example artist - Example title",
"path": "C:\\Files\\My Music\\Example.mp3"
},
{
"runtime": "321",
"display": "Example Artist2 - Example title2",
"path": "C:\\Files\\My Music\\Favorites\\Example2.ogg"
}
]
"""
import shlex
from typing import List, Dict
import jc.utils
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.0'
description = 'M3U and M3U8 file parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
tags = ['file']
__version__ = info.version
def _process(proc_data: List[Dict]) -> List[Dict]:
"""
Final processing to conform to the schema.
Parameters:
proc_data: (List of Dictionaries) raw structured data to process
Returns:
List of Dictionaries. Structured to conform to the schema.
"""
int_list = {'runtime'}
for entry in proc_data:
for key in entry:
if key in int_list:
entry[key] = jc.utils.convert_to_int(entry[key])
return proc_data
def parse(
data: str,
raw: bool = False,
quiet: bool = False
) -> List[Dict]:
"""
Main text parsing function
Parameters:
data: (string) text data to parse
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
Returns:
List of Dictionaries. Raw or processed structured data.
"""
jc.utils.compatibility(__name__, info.compatible, quiet)
jc.utils.input_type_check(data)
raw_output: List = []
output_line = {}
if jc.utils.has_data(data):
for line in filter(None, data.splitlines()):
# ignore any lines with only whitespace
if not jc.utils.has_data(line):
continue
# extended info fields
if line.lstrip().startswith('#EXTINF:'):
splitline = line.strip().split(':', maxsplit=1)
# best-effort to parse additional extended fields
# if a parsing error occurs, a warning message will be
# printed to STDERR and `unparsed_info` added
try:
extline = shlex.shlex(splitline[1], posix=True)
extline.whitespace_split = True
extline.whitespace = ', ' # add comma to whitespace detection
extline.quotes = '"' # remove single quotes
extline_list = list(extline)
runtime = extline_list.pop(0)
display_list = []
for item in extline_list:
if '=' in item:
k, v = item.split('=', maxsplit=1)
output_line.update({k: v})
else:
display_list.append(item)
display = ' '.join(display_list)
output_line.update({
'runtime': runtime,
'display': display
})
except Exception:
if not quiet:
jc.utils.warning_message([
'Not able to parse non-standard extensions in the following line:',
line
])
output_line = {'unparsed_info': line}
continue
# ignore all other extension info (obsolete)
if line.lstrip().startswith('#'):
continue
# any lines left over are paths
output_line.update(
{'path': line.strip()}
)
raw_output.append(output_line)
output_line = {}
return raw_output if raw else _process(raw_output)