In [2]:
import os
import shutil
from math import ceil
import zipfile

In [None]:


def split_folder_into_parts(source_dir, num_parts=50):
    files = sorted([f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))])
    total_files = len(files)
    chunk_size = ceil(total_files / num_parts)

    for i in range(num_parts):
        part_dir = os.path.join(source_dir, f'part_{i+1}')
        os.makedirs(part_dir, exist_ok=True)
        for f in files[i * chunk_size : (i + 1) * chunk_size]:
            shutil.move(os.path.join(source_dir, f), os.path.join(part_dir, f))

# Example usage:
split_folder_into_parts("train")


In [20]:
import os
import tarfile

def compress_all_subfolders(parent_dir):
    for item in os.listdir(parent_dir):
        full_path = os.path.join(parent_dir, item)
        if os.path.isdir(full_path):
            output_tar = os.path.join(parent_dir, f"train_{item}.tar.gz")
            with tarfile.open(output_tar, "w:gz") as tar:
                tar.add(full_path, arcname=item)
            print(f"Compressed: {item} → {output_tar}")

# Example usage:
compress_all_subfolders("train")


Compressed: part_37 → train/train_part_37.tar.gz
Compressed: part_10 → train/train_part_10.tar.gz
Compressed: part_42 → train/train_part_42.tar.gz
Compressed: part_12 → train/train_part_12.tar.gz
Compressed: part_35 → train/train_part_35.tar.gz
Compressed: part_29 → train/train_part_29.tar.gz
Compressed: part_46 → train/train_part_46.tar.gz
Compressed: part_7 → train/train_part_7.tar.gz
Compressed: part_13 → train/train_part_13.tar.gz
Compressed: part_21 → train/train_part_21.tar.gz
Compressed: part_45 → train/train_part_45.tar.gz
Compressed: part_22 → train/train_part_22.tar.gz
Compressed: part_6 → train/train_part_6.tar.gz
Compressed: part_34 → train/train_part_34.tar.gz
Compressed: part_19 → train/train_part_19.tar.gz
Compressed: part_31 → train/train_part_31.tar.gz
Compressed: part_1 → train/train_part_1.tar.gz
Compressed: part_26 → train/train_part_26.tar.gz
Compressed: part_17 → train/train_part_17.tar.gz
Compressed: part_28 → train/train_part_28.tar.gz
Compressed: part_43 → trai

In [7]:
import os
import tarfile

def extract_flat_to_data(source_dir, output_dir="data"):
    source_dir = os.path.abspath(source_dir)
    output_dir = os.path.abspath(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    for filename in os.listdir(source_dir):
        if filename.endswith(".tar.gz"):
            tar_path = os.path.join(source_dir, filename)
            with tarfile.open(tar_path, "r:gz") as tar:
                for member in tar.getmembers():
                    if "Zone.Identifier" in member.name or member.name.endswith(":Zone.Identifier"):
                        continue
                    if member.isfile():
                        # Flatten path: just use the base name
                        member.name = os.path.basename(member.name)
                        tar.extract(member, path=output_dir)
            print(f"Flatten-extracted: {filename} → {output_dir}")

# Example usage:
extract_flat_to_data("data_compressed/")


  tar.extract(member, path=output_dir)


Flatten-extracted: part_13.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_12.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_18.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_3.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_7.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_17.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_20.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_8.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_4.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_6.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_2.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_10.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_11.tar.gz → /home/roguchi/cse253-assignment2/data
Flatten-extracted: part_5.tar.gz → /home/rog