Skip to content

Commit 83eda44

Browse files
authored
Unzip dataset if it is in .bz2 format (#47)
1 parent 8e99154 commit 83eda44

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

benchmark/dataset.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import shutil
33
import tarfile
4+
import bz2
45
import urllib.request
56
import urllib.parse
67
from dataclasses import dataclass, field
@@ -201,6 +202,19 @@ def _extract_or_move_file(self, tmp_path, target_path):
201202
with tarfile.open(tmp_path) as file:
202203
file.extractall(target_path)
203204
os.remove(tmp_path)
205+
elif tmp_path.endswith(".bz2"):
206+
print(f"Extracting bz2: {tmp_path} -> {target_path}")
207+
Path(target_path).parent.mkdir(exist_ok=True)
208+
# Remove .bz2 extension from target path if present
209+
if str(target_path).endswith(".bz2"):
210+
final_target_path = str(target_path)[:-4] # Remove .bz2
211+
else:
212+
final_target_path = target_path
213+
214+
with bz2.BZ2File(tmp_path, 'rb') as f_in:
215+
with open(final_target_path, 'wb') as f_out:
216+
shutil.copyfileobj(f_in, f_out)
217+
os.remove(tmp_path)
204218
else:
205219
print(f"Moving: {tmp_path} -> {target_path}")
206220
Path(target_path).parent.mkdir(exist_ok=True)

0 commit comments

Comments
 (0)