In [30]:
import cv2
import numpy as np
import requests
from PIL import Image
from io import BytesIO
import os
import pandas as pd

def load_image_from_url(url):
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        image = Image.open(BytesIO(response.content)).convert('RGB')
        return np.array(image)
    except requests.RequestException as e:
        print(f"Error loading image from URL: {e}")
        return None

def read_urls_from_csv(csv_file):
    try:
        df = pd.read_csv(csv_file)
        urls_by_folder = {}
        for index, row in df.iterrows():
            folder_name = f'{index}_{row[0]}'  # Combine row index and first column value
            urls_by_folder[folder_name] = []
            for url_string in row[1:].dropna():  # Skip the first column (folder name)
                split_urls = [url.strip() for url in url_string.split(',')]
                urls_by_folder[folder_name].extend(split_urls)
        
        return urls_by_folder
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return {}

def detect_edges(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)
    edges = cv2.Canny(blurred_image, 50, 150)
    return edges

def find_boundaries(edges):
    row_boundaries = []
    previous_row_sum = np.sum(edges[0, :])
    for i in range(1, edges.shape[0]):
        current_row_sum = np.sum(edges[i, :])

        if previous_row_sum == 0 and current_row_sum > 0:
            row_boundaries.append(i)
        
        previous_row_sum = current_row_sum

    row_boundaries.append(edges.shape[0])
    return row_boundaries

def process_image(url, output_folder, image_index):
    image = load_image_from_url(url)
    if image is None:
        print(f"Skipping URL due to image load failure: {url}")
        return

    edges = detect_edges(image)
    row_boundaries = find_boundaries(edges)

    if len(row_boundaries) <= 1:
        # 경계선이 발견되지 않은 경우 원본 이미지를 저장
        output_path = os.path.join(output_folder, f'image_{image_index}.png')
        cv2.imwrite(output_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
        print(f'Saved original image to {output_path}')
    else:
        # 경계선이 있는 경우, 경계선 사이의 이미지 조각을 저장
        for k in range(1, len(row_boundaries)):
            if row_boundaries[k] - row_boundaries[k-1] > 500:
                cropped_image = image[row_boundaries[k-1]:row_boundaries[k], :]
                output_path = os.path.join(output_folder, f'image_{image_index}_row_crop_{k}.png')
                cv2.imwrite(output_path, cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR))
                print(f'Saved cropped image to {output_path}')

def main(csv_file):
    if not os.path.isfile(csv_file):
        print(f"CSV file does not exist: {csv_file}")
        return

    urls_by_folder = read_urls_from_csv(csv_file)
    if not urls_by_folder:
        print("No URLs to process. Exiting.")
        return
    
    common_output_folder = os.path.join(os.path.expanduser("~"), 'cropped_images')
    os.makedirs(common_output_folder, exist_ok=True)

    for folder_name, urls in urls_by_folder.items():
        output_folder = os.path.join(common_output_folder, folder_name)
        os.makedirs(output_folder, exist_ok=True)

        for image_index, url in enumerate(urls, start=1):
            process_image(url, output_folder, image_index)

# CSV 파일 경로를 입력하세요 (절대 경로 사용)
csv_file_path = '/Users/nyeong/Downloads/상세이미지 크롤링.csv'

# 메인 함수 실행
main(csv_file_path)


  folder_name = f'{index}_{row[0]}'  # Combine row index and first column value


Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_1_row_crop_1.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_4_row_crop_1.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_4_row_crop_3.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_4_row_crop_8.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_4_row_crop_12.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_4_row_crop_14.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_6_row_crop_2.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_6_row_crop_3.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_6_row_crop_5.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_6_row_crop_8.png
Saved cropped image to /Users/nyeong/cropped_images/0_스웨트셔츠 [블랙]/image_6_row_crop_11.png
Saved cropped image to /Users