In [None]:
import urllib.request
import os
from zipfile import ZipFile
from tqdm import tqdm

# Replace these values with your specific ones
url = "https://archive.ics.uci.edu/static/public/516/kitsune+network+attack+dataset.zip"
folder_path = "./datasets"
file_name = "kitsune+network+attack+dataset.zip"

def download_with_progress(url, file_path):
    # Open the URL
    with urllib.request.urlopen(url) as response:
        # Get the file size from the header, if available
        file_size_header = response.headers.get('Content-Length')
        if file_size_header is not None:
            file_size = int(file_size_header)
        else:
            file_size = None
        
        # Create a progress bar
        with tqdm(total=file_size, unit='B', unit_scale=True, desc=file_name) as pbar:
            # Download the file in chunks
            with open(file_path, 'wb') as file, urllib.request.urlopen(url) as response:
                buffer_size = 1024 * 8
                while True:
                    chunk = response.read(buffer_size)
                    if not chunk:
                        break
                    file.write(chunk)
                    pbar.update(len(chunk))

def download_and_extract(url, folder_path, file_name):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    # Combine folder path and file name to get the full path
    full_path = os.path.join(folder_path, file_name)
    
    # Download the file with progress
    download_with_progress(url, full_path)
    
    print(f"\nFile downloaded at: {full_path}")
    
    # Extract the contents of the zip file
    with ZipFile(full_path, 'r') as zip_ref:
        zip_ref.extractall(folder_path)
    
    print(f"File extracted to: {folder_path}")

# Call the function with the specified values
download_and_extract(url, folder_path, file_name)


kitsune+network+attack+dataset.zip: 19.0GB [05:12, 60.7MB/s]



File downloaded at: ./datasets/kitsune+network+attack+dataset.zip


In [1]:
!pip install tqdm



In [2]:
import os
from zipfile import ZipFile
from tqdm import tqdm

def extract_all_files_with_progress(zip_file_path, extract_folder):
    with ZipFile(zip_file_path, 'r') as zip_ref:
        # Get the list of files in the ZIP archive
        file_list = zip_ref.namelist()
        
        # Create a progress bar
        with tqdm(total=len(file_list), desc="Extracting files") as pbar:
            # Extract all files to the specified folder
            for file in file_list:
                zip_ref.extract(file, extract_folder)
                pbar.update(1)

# Example usage:
zip_file_path = "./datasets/kitsune+network+attack+dataset.zip"  # Replace with the path to your ZIP file
extract_folder = "./datasets/extracted"  # Replace with the desired extraction folder

# Ensure the extraction folder exists
if not os.path.exists(extract_folder):
    os.makedirs(extract_folder)

# Call the function to extract all files with progress
extract_all_files_with_progress(zip_file_path, extract_folder)

Extracting files: 100%|██████████| 37/37 [02:35<00:00,  4.21s/it]


In [8]:
import os
from zipfile import ZipFile
from tqdm import tqdm

def extract_all_files_with_progress(zip_file_path, extract_folder):
    with ZipFile(zip_file_path, 'r') as zip_ref:
        # Get the list of files in the ZIP archive
        file_list = zip_ref.namelist()
        
        # Create a progress bar for the outer extraction
        with tqdm(total=len(file_list), desc="Extracting files") as pbar_outer:
            # Extract all files to the specified folder
            for file in file_list:
                zip_ref.extract(file, extract_folder)
                pbar_outer.update(1)

                # Check if the extracted file is another compressed archive (ZIP)
                if file.lower().endswith('.zip' or '.gz'):
                    # Extract files from the nested ZIP archive recursively
                    nested_zip_path = os.path.join(extract_folder, file)
                    nested_extract_folder = os.path.join(extract_folder, os.path.splitext(file)[0])
                    extract_all_files_with_progress(nested_zip_path, nested_extract_folder)

                    # Update the progress bar for the nested extraction
                    pbar_outer.set_postfix_str(f"Nested Extraction: {file}", refresh=True)

# Example usage:
zip_file_path = "./datasets/kitsune+network+attack+dataset.zip"  # Replace with the path to your ZIP file
extract_folder = "./datasets/extracted"  # Replace with the desired extraction folder

# Ensure the extraction folder exists
if not os.path.exists(extract_folder):
    os.makedirs(extract_folder)

# Call the function to extract all files with progress, including nested ZIP archives
extract_all_files_with_progress(zip_file_path, extract_folder)


Extracting files: 100%|██████████| 37/37 [02:47<00:00,  4.54s/it]


In [None]:
import os
import gzip
from tqdm import tqdm

def extract_all_gz_files(folder_path, extract_folder):
    # Create the extraction folder if it doesn't exist
    if not os.path.exists(extract_folder):
        os.makedirs(extract_folder)

    # Get a list of all files in the folder
    file_list = [file for file in os.listdir(folder_path) if file.lower().endswith('.gz')]

    # Create a progress bar
    with tqdm(total=len(file_list), desc="Extracting .gz files") as pbar:
        # Extract each .gz file
        for file in file_list:
            input_path = os.path.join(folder_path, file)
            output_path = os.path.join(extract_folder, os.path.splitext(file)[0])

            with gzip.open(input_path, 'rb') as gz_file, open(output_path, 'wb') as output_file:
                output_file.write(gz_file.read())

            pbar.update(1)

# Example usage:
input_folder = "./datasets/extracted/active_wiretap"  # Replace with the path to your folder containing .gz files
output_folder = "./datasets/extracted/active_wiretap/extracted"  # Replace with the desired extraction folder

# Call the function to extract all .gz files with progress
extract_all_gz_files(input_folder, output_folder)

Extracting .gz files:  67%|██████▋   | 2/3 [00:03<00:01,  1.99s/it]