In [1]:
import pandas as pd
dataframe = pd.read_csv("slowfashion_dataset/slowfashion_all_images.txt", header=None)

In [2]:
dataframe.columns = ["path"]

In [3]:
paths_list = dataframe["path"].to_list()

In [4]:
dataframe[~dataframe["path"].str.contains("http")]

Unnamed: 0,path
0,FRONT:
536,BACK:
841,LABEL:
1146,FIRST DETAIL:
1431,SECOND DETAIL:
1639,OTHER:


In [5]:
dictionary_elements = {}
current_key=""
for path in paths_list:
    if "http" not in path:
        current_key = path.replace(":","")
        dictionary_elements[current_key] = []
    else:
        dictionary_elements[current_key].append(path)

In [6]:
#dictionary_elements

In [7]:
import os
import requests
import time
from typing import Dict, List, Tuple
from tqdm import tqdm
from urllib.parse import urlparse

def download_images_to_folders(
    folder_dict: Dict[str, List[str]],
    parent_folder: str
) -> Tuple[List[Tuple[str, str, str]], List[str]]:
    """
    Downloads images from a dictionary mapping folder names to lists of image URLs.

    Args:
        folder_dict: Dict where keys are folder names, and values are lists of image URLs.
        parent_folder: The base path where the folders should be created/downloaded to.

    Returns:
        A tuple:
            - List of (folder_name, image_url, error_message) for failed downloads.
            - List of full paths to successfully downloaded images.
    """
    error_list = []
    downloaded_paths = []

    for folder_name, url_list in folder_dict.items():
        print(f"\n Downloading images into folder: {folder_name}")
        folder_path = os.path.join(parent_folder, folder_name)
        os.makedirs(folder_path, exist_ok=True)

        for url in tqdm(url_list, desc=f"Processing {folder_name}", unit="img"):
            try:
                file_name = os.path.basename(urlparse(url).path)
                file_path = os.path.join(folder_path, file_name)

                response = requests.get(url, timeout=10)
                response.raise_for_status()

                with open(file_path, 'wb') as f:
                    f.write(response.content)

                downloaded_paths.append(file_path)
                time.sleep(0.5)

            except Exception as e:
                error_list.append((folder_name, url, str(e)))

    return error_list, downloaded_paths




In [9]:
errors, downloaded = download_images_to_folders(dictionary_elements, parent_folder="../slowfashion/")


 Downloading images into folder: FRONT


Processing FRONT: 100%|██████████████████████████████████████████████████████████████| 535/535 [06:01<00:00,  1.48img/s]



 Downloading images into folder: BACK


Processing BACK: 100%|███████████████████████████████████████████████████████████████| 304/304 [03:26<00:00,  1.47img/s]



 Downloading images into folder: LABEL


Processing LABEL: 100%|██████████████████████████████████████████████████████████████| 304/304 [03:31<00:00,  1.44img/s]



 Downloading images into folder: FIRST DETAIL


Processing FIRST DETAIL: 100%|███████████████████████████████████████████████████████| 284/284 [03:17<00:00,  1.44img/s]



 Downloading images into folder: SECOND DETAIL


Processing SECOND DETAIL: 100%|██████████████████████████████████████████████████████| 207/207 [02:24<00:00,  1.43img/s]



 Downloading images into folder: OTHER


Processing OTHER: 100%|████████████████████████████████████████████████████████████| 1374/1374 [15:51<00:00,  1.44img/s]


In [13]:
results_dataframe = pd.DataFrame(downloaded)
results_dataframe.columns = ["path"]

In [15]:
results_dataframe["type"] = results_dataframe["path"].apply(lambda item: item.split("/")[2])

In [17]:
results_dataframe[["type","path"]].to_csv("../slowfashion/information.csv", index=False)