/
get_media_type_list.py
executable file
·86 lines (69 loc) · 2.23 KB
/
get_media_type_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# pylint: disable=missing-docstring
import csv
import json
import logging
from pathlib import Path
from tempfile import TemporaryDirectory
import click
import click_log
from libratom.cli import PATH_METAVAR
from libratom.cli.cli import set_log_level_from_verbose
from libratom.cli.utils import PathPath, validate_out_path
from libratom.lib.download import download_files
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
# Set configuration on the root logger
click_log.basic_config(logging.getLogger())
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"-v",
"--verbose",
count=True,
callback=set_log_level_from_verbose,
help="Increase verbosity (can be repeated).",
expose_value=False,
)
@click.option(
"-o",
"--out",
metavar=PATH_METAVAR,
default=Path("media_types.json"),
callback=validate_out_path,
type=PathPath(resolve_path=True),
help=f"Write the output to {PATH_METAVAR}.",
)
def download_media_type_files(out) -> None:
"""Download media type files from https://www.iana.org/ and write a JSON file of all media types.
"""
media_types = []
media_type_registries = [
"application",
"audio",
"font",
"image",
"message",
"model",
"multipart",
"text",
"video",
]
# CSV files to download
urls = [
f"https://www.iana.org/assignments/media-types/{registry}.csv"
for registry in media_type_registries
]
with TemporaryDirectory() as tmpdir:
directory = Path(tmpdir)
download_files(urls, directory, dry_run=False)
for file in directory.glob("*.csv"):
with file.open(newline="") as csvfile:
reader = csv.reader(csvfile)
# Use the first token (Name) in each row, skip headers
# The split is to strip DEPRECATED/OBSOLETED/... mentions appended to the name
for [name, *_] in reader:
if name != "Name":
media_types.append(f"{file.stem}/{name.split(maxsplit=1)[0]}")
with out.open(mode="w") as f:
json.dump(sorted(media_types), f, indent=4)
if __name__ == "__main__":
download_media_type_files()