In [3]:
import os
import requests
from PIL import Image
from io import BytesIO
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# CSV 파일 경로
file_path = 'hash_pid_with_imageUrls.csv'
#file_path = 'sample2.csv'
# CSV 파일 읽기
data = pd.read_csv(file_path)

# 이미지 다운로드 및 저장 함수
def download_and_save_image(image_url, pid, image_number):
    try:
        response = requests.get(image_url, timeout=10)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            image = image.convert('RGB')
            directory = f'{pid}'
            if not os.path.exists(directory):
                os.makedirs(directory)
            file_name = os.path.join(directory, f'{pid}_{image_number}.jpg')
            image.save(file_name, 'JPEG')
        # 상태 코드가 200이 아닌 경우 예외를 발생시키지 않으므로 로그만 남김
        else:
            pass
    except Exception as e:
        pass

# 멀티스레딩으로 이미지 다운로드
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = []
    for index, row in data.iterrows():
        pid = row['hash_pid']
        for i in range(1, 13):
            image_url_column = f'imageUrl{i}'
            if pd.notna(row[image_url_column]):
                futures.append(executor.submit(download_and_save_image, row[image_url_column], pid, i))
    
    # tqdm을 사용하여 진행 상황 표시
    for _ in tqdm(ThreadPoolExecutor(max_workers=10).map(lambda x: x.result(), futures), total=len(futures), desc="Downloading images"):
        pass


: 