In [1]:
from simple_image_download import simple_image_download as simp
import os
import shutil
import time

# 저장할 카테고리
categories = [
    "꽃다발 축하", "꽃다발 졸업", "꽃다발 취업", "꽃다발 생일",
    "꽃다발 프로포즈", "꽃다발 행사", "꽃다발 감사", "꽃다발 위로",
    "꽃다발 장례", "꽃다발 일상"
]

# 기본 다운로드 폴더
base_dir = "/Users/iminjae/Desktop/꽃 카테고리 이미지"
os.makedirs(base_dir, exist_ok=True)
downloader = simp.simple_image_download
for category in categories:
    folder_name = category.replace(" ", "_")  # 공백 → 언더바
    save_path = os.path.join(base_dir, folder_name)
    os.makedirs(save_path, exist_ok=True)

    print(f"[INFO] '{category}' 검색 및 다운로드 중...")
    downloader().download(category, 100)
    # simple_images 폴더에서 해당 카테고리 폴더 이동
    downloaded_path = os.path.join("simple_images", category)
    if os.path.exists(downloaded_path):
        for file in os.listdir(downloaded_path):
            shutil.move(os.path.join(downloaded_path, file), os.path.join(save_path, file))
        shutil.rmtree(downloaded_path)

    # 요청 간격 (IP 차단 방지)
    time.sleep(3)

print("[DONE] 모든 카테고리 다운로드 완료")


[INFO] '꽃다발 축하' 검색 및 다운로드 중...
HTTPSConnectionPool(host='www.fineflower.co.kr', port=443): Max retries exceeded with url: /board/upload/dica/18317666641422249264.jpg (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1147)')))
[INFO] '꽃다발 졸업' 검색 및 다운로드 중...
HTTPSConnectionPool(host='img0.yna.co.kr', port=443): Max retries exceeded with url: /etc/inner/KR/2025/02/07/AKR20250207080600505_05_i_P2.jpg (Caused by SSLError(SSLError(1, '[SSL: UNSAFE_LEGACY_RENEGOTIATION_DISABLED] unsafe legacy renegotiation disabled (_ssl.c:1147)')))
[INFO] '꽃다발 취업' 검색 및 다운로드 중...
HTTPSConnectionPool(host='img0.yna.co.kr', port=443): Max retries exceeded with url: /etc/inner/KR/2025/02/07/AKR20250207080600505_03_i_P4.jpg (Caused by SSLError(SSLError(1, '[SSL: UNSAFE_LEGACY_RENEGOTIATION_DISABLED] unsafe legacy renegotiation disabled (_ssl.c:1147)')))
HTTPSConnectionPool(host='img4.yna.co.kr', port=443): Ma

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report
import numpy as np

# ===== 1. 디바이스 설정 =====
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"[INFO] Using device: {device}")

# ===== 2. 하이퍼파라미터 =====
data_dir = "/Users/iminjae/Desktop/꽃 카테고리 이미지"  # 폴더 구조: class별 하위폴더
batch_size = 16
num_epochs = 10

# ===== 3. 데이터 전처리 =====
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# ===== 4. 데이터셋 로드 =====
full_dataset = datasets.ImageFolder(data_dir, transform=transform_train)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_val

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# ===== 5. 모델 정의 (ResNet18 전이학습) =====
num_classes = len(full_dataset.classes)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# ===== 6. 손실함수 & 옵티마이저 =====
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

# ===== 7. 학습 루프 =====
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()

        # MPS는 일부 연산이 비동기이므로 동기화 필요할 때 flush
        if device.type == "mps":
            torch.mps.synchronize()

        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)

    # ===== 검증 =====
    model.eval()
    correct, total = 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_acc = correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f} Val Acc: {val_acc:.4f}")

# ===== 8. 클래스별 평가 =====
print(classification_report(all_labels, all_preds, target_names=full_dataset.classes))

# ===== 9. 모델 저장 =====
torch.save(model.state_dict(), "flower_cnn_mps.pth")
print("✅ 모델 저장 완료")


[INFO] Using device: mps
Epoch [1/10] Loss: 2.1024 Val Acc: 0.4375
Epoch [2/10] Loss: 0.8787 Val Acc: 0.4821
Epoch [3/10] Loss: 0.4845 Val Acc: 0.4911
Epoch [4/10] Loss: 0.3177 Val Acc: 0.4732
Epoch [5/10] Loss: 0.3013 Val Acc: 0.4464
Epoch [6/10] Loss: 0.2684 Val Acc: 0.4464
Epoch [7/10] Loss: 0.2333 Val Acc: 0.4643
Epoch [8/10] Loss: 0.2234 Val Acc: 0.4643
Epoch [9/10] Loss: 0.2342 Val Acc: 0.4821
Epoch [10/10] Loss: 0.2328 Val Acc: 0.4643
              precision    recall  f1-score   support

      꽃다발_감사       0.73      0.53      0.62        15
      꽃다발_생일       0.22      0.25      0.24         8
      꽃다발_위로       0.50      0.30      0.38        10
      꽃다발_일상       1.00      0.43      0.60        14
      꽃다발_장례       0.43      0.60      0.50         5
      꽃다발_졸업       0.44      0.44      0.44         9
      꽃다발_축하       0.30      0.23      0.26        13
      꽃다발_취업       0.41      0.80      0.55        15
    꽃다발_프로포즈       0.47      0.69      0.56        13
      꽃다발_행사 

In [4]:
from PIL import Image
import os

data_dir = "/Users/iminjae/Desktop/꽃 카테고리 이미지"

def clean_images(base_dir):
    removed_count = 0
    for root, _, files in os.walk(base_dir):
        for file in files:
            if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
                os.remove(os.path.join(root, file))
                removed_count += 1
                continue
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    img.verify()  # 파일이 정상인지 검사
            except Exception as e:
                os.remove(file_path)
                removed_count += 1
    print(f"✅ 손상/비이미지 파일 {removed_count}개 삭제 완료")

clean_images(data_dir)

✅ 손상/비이미지 파일 444개 삭제 완료


In [6]:
import torch
from torchvision import transforms, models
from PIL import Image
import os

# ===== 1. 설정 =====
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model_path = "/Users/iminjae/Desktop/개화/flower_cnn_mps.pth"  # 학습된 모델 경로
data_dir = "/Users/iminjae/Desktop/꽃 카테고리 이미지"

# ===== 2. 클래스 이름 불러오기 (ImageFolder 방식 기준) =====
from torchvision import datasets
dataset = datasets.ImageFolder(data_dir)
class_names = dataset.classes  # ['꽃다발_감사', '꽃다발_graduation', ...] 이런 식
print("[INFO] 클래스 목록:", class_names)

# ===== 3. 모델 구조 동일하게 로드 =====
num_classes = len(class_names)
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()

# ===== 4. 이미지 전처리 =====
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def predict_image(img_path):
    img = Image.open(img_path).convert("RGB")
    input_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(input_tensor)
        _, pred = torch.max(outputs, 1)
        predicted_class = class_names[pred.item()]

    return predicted_class

# ===== 5. 테스트 =====
test_img = "/Users/iminjae/Downloads/lasbelloflower/3611293476041776377_48782193714_4.jpg"  # 예측할 이미지 경로
result = predict_image(test_img)
print(f"💐 이 꽃다발은 '{result}' 분위기입니다.")


[INFO] 클래스 목록: ['꽃다발_감사', '꽃다발_생일', '꽃다발_위로', '꽃다발_일상', '꽃다발_장례', '꽃다발_졸업', '꽃다발_축하', '꽃다발_취업', '꽃다발_프로포즈', '꽃다발_행사']




💐 이 꽃다발은 '꽃다발_프로포즈' 분위기입니다.


# 2번 시도

In [4]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [openpyxl]
[1A[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5


In [6]:
import pandas as pd

# CSV 파일 경로
excel_path = '/Users/iminjae/Desktop/개화/경매정산통계.xlsx'  # 예: 화훼유통정보.csv

# CSV 불러오기
df = pd.read_excel(excel_path)

# '품종'이 비어 있는 경우는 '품목' 자체로만 구분
df['품종'] = df['품종'].fillna('')

# "장미 -계" 처럼 '-계' 붙은 품목은 품종 없이 처리 (불필요한 접미어 제거)
df['품목'] = df['품목'].str.replace('-계', '', regex=False).str.strip()

# '품목명'과 '품종명'을 합쳐 최종 검색어 생성
df['검색어'] = df.apply(lambda row: f"{row['품목']} {row['품종']}".strip(), axis=1)

# 총수량 기준으로 내림차순 정렬
df_sorted = df.sort_values(by='총수량', ascending=False)

# 상위 10개 추출
top10 = df_sorted.head(10)

# 검색어만 출력
print("✅ 구글 이미지 검색용 top10 꽃 품종:")
for i, kw in enumerate(top10['검색어'].tolist(), 1):
    print(f"{i:2d}. {kw}")


✅ 구글 이미지 검색용 top10 꽃 품종:
 1. 장미
 2. 국화
 3. 거베라
 4. 리시안사스
 5. 수국
 6. 공작초
 7. 해바라기
 8. 백합
 9. 거베라 미니(혼합)
10. 옥시페탈륨


In [7]:
from simple_image_download import simple_image_download as simp
import os
import time

# 🏷️ Top 10 꽃 품종명 리스트 (앞서 추출한 결과 예시)
flower_keywords = [
    "장미", "국화", "거베라", "리시안사스", "수국",
    "공작초", "해바라기", "백합", "옥시페탈륨", "노무라"
]

# 🗂️ 저장 경로 설정
base_dir = "/Users/iminjae/Desktop/개화/꽃품종분류"
os.makedirs(base_dir, exist_ok=True)

# 🔍 크롤링 시작
downloader = simp.simple_image_download

for kw in flower_keywords:
    # 예: "장미 꽃" 식으로 검색
    search_term = f"{kw} 꽃"
    folder_name = kw.replace(" ", "_")
    save_path = os.path.join(base_dir, folder_name)
    os.makedirs(save_path, exist_ok=True)

    print(f"[INFO] '{search_term}' 검색 및 다운로드 중...")
    try:
        downloader().download(search_term, 100)
        # 저장된 이미지 이동 (simple_images/<검색어> → base_dir/<카테고리>)
        downloaded_path = os.path.join("simple_images", search_term)
        if os.path.exists(downloaded_path):
            for file in os.listdir(downloaded_path):
                ext = file.lower().split('.')[-1]
                if ext in ['jpg', 'jpeg', 'png']:
                    src = os.path.join(downloaded_path, file)
                    dst = os.path.join(save_path, file)
                    os.rename(src, dst)
            os.rmdir(downloaded_path)
    except Exception as e:
        print(f"[ERROR] {kw} 크롤링 실패: {e}")

    # 요청 간격 (Google 차단 방지)
    time.sleep(3)

print("\n✅ 모든 품종 크롤링 완료!")


[INFO] '장미 꽃' 검색 및 다운로드 중...
[INFO] '국화 꽃' 검색 및 다운로드 중...
HTTPSConnectionPool(host='img7.yna.co.kr', port=443): Max retries exceeded with url: /photo/yna/YH/2025/06/25/PYH2025062505610005100_P4.jpg (Caused by SSLError(SSLError(1, '[SSL: UNSAFE_LEGACY_RENEGOTIATION_DISABLED] unsafe legacy renegotiation disabled (_ssl.c:1147)')))
[INFO] '거베라 꽃' 검색 및 다운로드 중...
HTTPSConnectionPool(host='www.treeinfo.net', port=443): Max retries exceeded with url: /data/file/ti_gallery/thumb-978141395_4b5665f8_DSC_0169_400x300.jpg (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1147)')))
[INFO] '리시안사스 꽃' 검색 및 다운로드 중...
HTTPSConnectionPool(host='www.treeinfo.net', port=443): Max retries exceeded with url: /data/file/ti_gallery_free/2109149503_jiLSJ2Ub_1302196980011def1d34686da9ab7a746a8bf961.jpg (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed:

In [8]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121  # CUDA 12.1 기준
!pip install transformers accelerate einops bitsandbytes


Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting accelerate
  Downloading accelerate-1.9.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)
Downloading accelerate-1.9.0-py3-none-any.whl (367 kB)
Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m21.7 MB/s[0m  [33m0:00:04[0mm0:00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes, accelerate
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [accelerate]s]
[1A[2KSuccessfully installed accelerate-1.9.0 bitsandbytes-0.42.0


In [12]:
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest
 
mistral_models_path = "MISTRAL_MODELS_PATH"
 
tokenizer = MistralTokenizer.v1()
 
completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
 
tokens = tokenizer.encode_chat_completion(completion_request).tokens


In [28]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# 1. 모델 로드
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

# 2. 채팅 프롬프트 생성
prompt = tokenizer.apply_chat_template([
    {"role": "system", "content": "당신은 감정 기반 꽃다발 색상 추천 전문가입니다."},
    {"role": "user", "content": "친구가 이직에 성공했어요. 어떤 꽃다발을 주면 좋을까요?"}
], tokenize=False, add_generation_prompt=True)

# 3. 파이프라인으로 출력
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
output = pipe(prompt, max_new_tokens=128, temperature=0.7)

print(output[0]["generated_text"].replace(prompt, "").strip())


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2.
401 Client Error. (Request ID: Root=1-68941256-3a851b6d578e7a3629f1f8a7;0a0f662e-0d60-4d8f-9e0f-9b96d7405aea)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.2 is restricted. You must have access to it and be authenticated to access it. Please log in.

In [26]:
!pip install 'transformers[torch]'


