In [None]:
import os
import requests
import zipfile
from tqdm import tqdm

In [None]:
def download_and_unzip(url, target_path):
    """Downloads a file from a URL, shows a progress bar, and unzips it."""
    # Ensure the target directory exists
    os.makedirs(target_path, exist_ok=True)
    
    file_name = url.split('/')[-1]
    zip_path = os.path.join(target_path, file_name)
    
    # Check if the unzipped directory already exists to avoid re-downloading
    unzipped_dir_name = "Flicker8k_Dataset" if "Dataset" in file_name else "Flickr8k_text"
    if os.path.exists(os.path.join(target_path, unzipped_dir_name)):
        print(f"'{unzipped_dir_name}' already exists. Skipping download.")
        return

    print(f"Downloading {file_name}...")
    
    # Download the file with a progress bar
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an exception for bad status codes
        total_size_in_bytes = int(response.headers.get('content-length', 0))
        block_size = 1024  # 1 Kilobyte
        
        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
        with open(zip_path, 'wb') as file:
            for data in response.iter_content(block_size):
                progress_bar.update(len(data))
                file.write(data)
        progress_bar.close()

        if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
            print("ERROR, something went wrong during download.")
            return

        # Unzip the file
        print(f"Extracting {file_name}...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(target_path)
        
        # Clean up the downloaded zip file
        os.remove(zip_path)
        print(f"Successfully downloaded and extracted to '{target_path}'.")

    except requests.exceptions.RequestException as e:
        print(f"Failed to download {file_name}. Error: {e}")

In [None]:
images_url = "https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip"
text_url = "https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_text.zip"
    
# Define the target directory for the data
dataset_path = "./Flickr8k_Data"
    
# Download and extract the images
download_and_unzip(images_url, dataset_path)
    
# Download and extract the text files (captions)
download_and_unzip(text_url, dataset_path)
    
print("\nDataset is ready.")