-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_helper.py
68 lines (61 loc) · 2.18 KB
/
data_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import zipfile
from typing import List
from urllib.request import urlopen
def decompress_zip(filename, dest: str, verbose: bool = False) -> List[str]:
"""
Unzips a zip file.
:param filename: file to process
:param dest: destination
:param verbose: verbosity
:return: return the list of decompressed files
"""
try:
fp = zipfile.ZipFile(filename, "r")
except zipfile.BadZipFile as e:
raise RuntimeError(f"Unable to unzip {filename!r}") from e
files = []
for info in fp.infolist():
if not os.path.exists(info.filename):
data = fp.read(info.filename)
tos = os.path.join(dest, info.filename)
if not os.path.exists(tos):
finalfolder = os.path.split(tos)[0]
if not os.path.exists(finalfolder):
if verbose:
print(f"creating folder {finalfolder!r}")
os.makedirs(finalfolder)
if not info.filename.endswith("/"):
with open(tos, "wb") as u:
u.write(data)
files.append(tos)
if verbose:
print(f"unzipped {info.filename!r} to {tos!r}")
elif not tos.endswith("/"):
files.append(tos)
elif not info.filename.endswith("/"):
files.append(info.filename)
return files
def download_and_unzip(
url: str, dest: str = ".", timeout: int = 10, verbose: bool = False
) -> List[str]:
"""
Downloads a file and unzip it.
:param url: url
:param dest: destination folder
:param timeout: timeout
:param verbose: display progress
:return: list of unzipped files
"""
filename = url.split("/")[-1]
dest_zip = os.path.join(dest, filename)
if not os.path.exists(dest_zip):
if verbose:
print(f"downloads into {dest_zip!r} from {url!r}")
with urlopen(url, timeout=timeout) as u:
content = u.read()
with open(dest_zip, "wb") as f:
f.write(content)
elif verbose:
print(f"already downloaded {dest_zip!r}")
return decompress_zip(dest_zip, dest, verbose=verbose)