<a href="https://colab.research.google.com/github/kamg2218/Data-Analysis-with-Open-Source/blob/main/%EC%98%A4%ED%94%88%EC%86%8C%EC%8A%A4_%EB%8D%B0%EC%9D%B4%ED%84%B0_%EB%B6%84%EC%84%9D_14%EA%B0%95.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 14강 비정형 데이터 분석 : 패션 사진 데이터 활용

### 목표

- 비정형 데이터를 인공지능 모델로 분석하여 실무에서 활용 가능한 보고서 형태로 가공

- 패션 트렌드라는 구체적인 주제를 통해, 비정형 데이터 분석의 실질적인 활용 방안을 경험하고자 함


### 분석 프로세스 개요

1. 데이터 수집
  - requests를 이용한 RSS 데이터 수집
  - lxml을 이용한 XML 파싱
  - 이미지 데이터 추출
2. VLM을 이용한 이미지 분석
  - 프롬프트를 이용한 이미지 필터링
  - 프롬프트를 이용한 스타일 분석
3. LLM을 이용한 키워드 분석 및 보고서 작성
  - 텍스트 전처리
  - 색상 및 스타일 키워드 추출
  - 워드 클라우드 분석
  - 보고서 작성

# 주의 : 런타임 GPU 로 설정 필요

In [None]:
# 4bit VLM 처리를 위한 bitsandbytes 설치
# LLM 처리를 위한 VLLM 설치 (오래걸리는 작업(>5분)이므로 미리 실행!)
!pip install bitsandbytes==0.45.3 vllm==0.7.3 transformers==4.48.2
# 필요 시 세션 재시작

In [None]:
# 한글 처리를 위한 matplotlib 설정 (1)

!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

- 런타임 -> 세션 다시 시작

In [1]:
# 한글 처리를 위한 matplotlib 설정 (2)

import matplotlib.pyplot as plt
plt.rc('font', family='NanumBarunGothic')

# 1. 데이터 수집 및 전처리

## 14-1 RSS 피드에서 이미지 URL 추출

In [2]:
import requests
from lxml import etree
from lxml.html import fromstring
import pandas as pd

def extract_unique_images(rss_url):
    ## 주어진 RSS 피드 URL에서 고유한 이미지 URL들을 추출하는 함수 정의
    try:
        ## requests 라이브러리를 사용하여 RSS 피드 URL로부터 내용을 가져옴
        response = requests.get(rss_url)
        ## 가져온 XML 응답 내용을 lxml의 etree.fromstring으로 파싱하여 XML 트리 root를 생성
        root = etree.fromstring(response.content)

        image_urls = set()

        ## XML 트리에서 모든 'item' 태그를 XPath를 사용하여 순회
        for item in root.xpath('//item'):
            description = item.find('description')
            if description is not None and description.text:
                ## description의 텍스트 내용을 lxml.html.fromstring으로 파싱하여 HTML 트리를 생성
                html_tree = fromstring(description.text)
                ## HTML 트리에서 첫 번째 <img> 태그의 'src' 속성 값을 XPath를 사용하여 추출
                img_url = html_tree.xpath('string(//img/@src)')
                if img_url:
                    image_urls.add(img_url)

        return list(image_urls)

    except Exception as e:
        ## 오류 발생 시 오류 메시지를 출력하고 빈 리스트를 반환
        print(f"Error occurred: {e}")
        return []

rss_url = "https://glltn.com/feed/"
## extract_unique_images 함수를 호출하여 고유한 이미지 URL들을 추출
unique_images = extract_unique_images(rss_url)

## 추출된 이미지 URL 리스트를 사용하여 'image'라는 열을 가진 pandas DataFrame을 생성
df = pd.DataFrame(unique_images, columns=["image"])

## 14-2 수집 데이터 확인

In [3]:
from IPython.display import display, HTML

def path_to_image_html(path):
    ## 이미지 경로를 HTML img 태그로 변환하는 함수
    return f'<img src="{path}" width="300" />'

## DataFrame의 스타일을 설정하여 이미지 너비를 300px로 지정
df.style.set_table_styles([{'selector': 'img', 'props': 'width: 300px;'}])

## DataFrame을 HTML로 변환하여 출력. 이미지 열은 path_to_image_html 함수로 포맷팅
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

Unnamed: 0,image
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,


## 2. VLM을 이용한 이미지 분석

## 14-3 VLM 모델 로드

In [4]:
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer

## 'openbmb/MiniCPM-V-2_6-int4' 모델을 사전 훈련된 가중치와 함께 로드
## trust_remote_code=True는 허브에서 사용자 정의 코드를 실행할 수 있도록 허용
model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
## 로드된 모델에 해당하는 토크나이저를 로드
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
## 모델을 평가 모드로 설정 (드롭아웃 등 훈련 시에만 필요한 기능 비활성화)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

configuration_minicpm.py: 0.00B [00:00, ?B/s]

modeling_navit_siglip.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- modeling_navit_siglip.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- configuration_minicpm.py
- modeling_navit_siglip.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_minicpmv.py: 0.00B [00:00, ?B/s]

resampler.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- resampler.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- modeling_minicpmv.py
- resampler.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.45G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenization_minicpmv_fast.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- tokenization_minicpmv_fast.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

MiniCPMV(
  (llm): Qwen2ForCausalLM(
    (model): Qwen2Model(
      (embed_tokens): Embedding(151666, 3584)
      (layers): ModuleList(
        (0-27): 28 x Qwen2DecoderLayer(
          (self_attn): Qwen2Attention(
            (q_proj): Linear4bit(in_features=3584, out_features=3584, bias=True)
            (k_proj): Linear4bit(in_features=3584, out_features=512, bias=True)
            (v_proj): Linear4bit(in_features=3584, out_features=512, bias=True)
            (o_proj): Linear4bit(in_features=3584, out_features=3584, bias=False)
          )
          (mlp): Qwen2MLP(
            (gate_proj): Linear4bit(in_features=3584, out_features=18944, bias=False)
            (up_proj): Linear4bit(in_features=3584, out_features=18944, bias=False)
            (down_proj): Linear4bit(in_features=18944, out_features=3584, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
          (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=

![](https://farm3.staticflickr.com/2677/4434956914_6e95a22940_z.jpg)

## 14-4 이미지 질문 응답 예시

In [5]:
from transformers import set_seed

## 재현성을 위해 시드(seed)를 42로 설정
set_seed(42)
## 예시 이미지 URL 정의
image_url = 'https://farm3.staticflickr.com/2677/4434956914_6e95a22940_z.jpg'
## requests로 이미지 다운로드 후 PIL Image 객체로 열고 RGB 형식으로 변환
image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
## 이미지에 대한 질문 정의
question = 'how many cats in the photo?'
## 모델 입력 형식에 맞춰 메시지 구성 (이미지와 질문 포함)
msgs = [{'role': 'user', 'content': [image, question]}]
## 모델의 chat 함수를 호출하여 이미지와 질문에 대한 응답 생성
result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
## 모델의 응답 출력
print(result)

preprocessor_config.json:   0%|          | 0.00/714 [00:00<?, ?B/s]

processing_minicpmv.py: 0.00B [00:00, ?B/s]

image_processing_minicpmv.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- image_processing_minicpmv.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-V-2_6-int4:
- processing_minicpmv.py
- image_processing_minicpmv.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


1


In [7]:
set_seed(86)
## 이미지에 대한 질문을 업데이트. 책 표지의 고양이도 포함하도록 요청
question = 'how many cats in the photo? including the books cover.'
## 모델 입력 형식에 맞춰 메시지 구성 (이전에 로드된 이미지와 업데이트된 질문 포함)
msgs = [{'role': 'user', 'content': [image, question]}]
## 모델의 chat 함수를 호출하여 업데이트된 질문에 대한 응답 생성
result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
## 모델의 응답 출력
print(result)

2


In [8]:
set_seed(42)
## 이미지에 대한 질문을 'describe the photo'로 설정하여 이미지 내용을 설명하도록 요청
question = 'describe the photo'
## 모델 입력 형식에 맞춰 메시지 구성 (이전에 로드된 이미지와 설명 요청 질문 포함)
msgs = [{'role': 'user', 'content': [image, question]}]
## 모델의 chat 함수를 호출하여 이미지에 대한 설명을 생성
result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
## 모델의 응답 (이미지 설명) 출력
print(result)

The photo shows a book titled "why dogs are better than cats" with an image of a cat on the cover, resting its head on a dog's back. To the right side of the frame is a real-life cat, standing and observing the camera, which has a similar striped pattern as the one depicted on the book cover. The setting appears to be indoors, possibly on a wooden surface or floor.


## 14-5 의류 이미지 여부 판단

In [10]:
def is_picture_of_clothing(image_url):
    ## 이미지 URL이 의류 사진인지 판단하는 함수
    # 의류가 포함된 사진인지 확인하는 질문 작성 (영어로)
    question = 'is this a picture of clothing? Must say yes or no.'
    image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    msgs = [{'role': 'user', 'content': [image, question]}]
    result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer, temperature=0.1)
    print(result)
    ## 응답에 'yes'가 포함되어 있는지 확인하여 True/False 반환
    return 'yes' in result.lower()

## DataFrame의 'image' 열에 함수를 적용하여 'is_clothing' 열에 결과 저장
df['is_clothing'] = df['image'].apply(is_picture_of_clothing)

No.
yes
No.
yes
yes
yes
yes
yes
yes
yes
yes
no


## 14-6 의류 판단 결과 시각화

In [11]:
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

Unnamed: 0,image,is_clothing
0,,False
1,,True
2,,False
3,,True
4,,True
5,,True
6,,True
7,,True
8,,True
9,,True


## 14-7 의류 이미지 필터링

In [None]:
## 'is_clothing' 열의 값이 True인 행들만 필터링하여 DataFrame을 업데이트


## 14-8 의류 스타일 분석

In [12]:
def describe_style(image_url):
    ## 주어진 이미지 URL의 의류 스타일을 분석하는 함수
    question = 'Analyze the style of the colthes. Please let me explain the colors and trend changes'
    image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    msgs = [{'role': 'user', 'content': [image, question]}]
    ## 모델의 chat 함수를 호출하여 이미지에 대한 스타일 분석 응답 생성
    result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
    return result

## 필터링된 DataFrame의 'image' 열에 describe_style 함수를 적용
## 결과는 'style'이라는 새로운 열에 저장
df['style'] = df['image'].apply(describe_style)

In [13]:
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

Unnamed: 0,image,is_clothing,style
0,,False,"The clothing style depicted in the photograph appears to be practical and suited for cooler weather, suggesting a setting during late autumn or winter. The individuals are wearing coats that seem to be made of heavy fabric, likely designed for warmth, which is common in colder climates or seasons. The dark colors of their attire could indicate a preference for modesty or functionality over fashion trends.\n\nIn terms of color, the palette consists mainly of muted tones such as black and shades of grey, which are often chosen for their versatility and ability to blend with various environments without drawing too much attention. This choice can also signify a utilitarian approach to dressing, prioritizing comfort and protection from the elements over aesthetic considerations.\n\nRegarding trend changes, it's challenging to pinpoint specific trends due to the limited view and context provided by the image. However, the absence of bright colors or trendy patterns suggests a conservative or classic sense of style, which might have been more prevalent in past decades before streetwear and casual fashion became dominant in urban settings. To provide a detailed analysis of current fashion trends, one would need more contemporary examples to compare against the historical or regional styles shown here."
1,,True,"The style of the boots in the image suggests a classic and functional design, often associated with outdoor activities such as hiking or mountaineering. The use of suede for the upper part indicates a preference for durable materials that offer both comfort and protection against rough terrain. The color brown is versatile and commonly used in footwear due to its ability to match various outfits and its natural appearance which blends well with outdoor environments.\n\nIn terms of trend changes, these types of boots have seen a resurgence in popularity, particularly among those who appreciate vintage or retro styles. This could be attributed to a broader trend towards practical, timeless pieces that are also stylish enough to be worn casually outside of their intended purpose. The presence of a zipper on the side adds a modern twist to an otherwise traditional design, offering convenience and ease of wear without compromising the aesthetic.\n\nOverall, the boots represent a blend of functionality and fashion, appealing to consumers looking for durable yet fashionable footwear options that can transition from outdoor adventures to urban settings."
2,,False,"The style of clothing in the image is indicative of a modern, minimalist aesthetic. The garments feature clean lines and simple cuts, with an emphasis on functionality over ornate detailing. This approach to design can be associated with contemporary fashion trends that prioritize comfort, versatility, and a timeless quality.\n\nIn terms of color, the outfits predominantly use neutral tones such as black, white, and shades of grey. These colors are versatile and often chosen for their ability to create a sophisticated and understated look. They also allow for easy layering and mixing and matching with other pieces in a wardrobe.\n\nThe inclusion of subtle patterns, like stripes or checks, adds visual interest without overwhelming the overall simplicity of the designs. Such patterns are popular in modern fashion for their ability to introduce texture and depth while maintaining a sleek appearance.\n\nIn summary, the clothing style depicted in the image aligns with current fashion trends that favor minimalism, versatility, and a refined, understated elegance achieved through the strategic use of neutral colors and subtle patterns."
3,,True,"The clothing style depicted in the image leans towards a casual, perhaps outdoor or streetwear aesthetic. The brown color of the jacket and pants suggests a preference for earthy tones, which are often associated with natural materials and can be seen as timeless in fashion. The textured pattern on the jacket adds a layer of depth to the outfit, indicating a trend that favors more tactile and visually interesting fabrics over plain surfaces.\n\nThe relaxed fit of the jacket and pants points to a comfort-oriented approach, which is a common characteristic of modern casual wear. This could indicate a shift away from highly structured and tailored garments towards a more laid-back, functional look that prioritizes ease of movement and personal expression.\n\nIn terms of trend changes, this style reflects a broader fashion movement where practicality meets aesthetics, particularly evident in recent years. It's indicative of a consumer base valuing versatility and sustainability in their wardrobe choices. The absence of flashy logos or bright colors further supports the idea of a minimalist and possibly eco-conscious fashion sensibility."
4,,True,"The sneakers depicted in the image are styled with a classic and somewhat vintage aesthetic. The use of beige or light tan for the upper part is reminiscent of military-inspired footwear, which has seen revivals over various decades. This color choice often conveys a sense of neutrality and versatility, making it a popular option for casual wear.\n\nThe contrasting red on the toe cap adds a pop of color that draws attention to this feature, potentially indicating a nod to retro design elements where such color contrasts were prominent. Red is often associated with energy and passion, which could be an intentional stylistic choice to give the shoes a dynamic look.\n\nIn terms of trend changes, these sneakers reflect a blend of contemporary and past styles. While the overall design leans towards timeless, the specific color choices suggest a modern twist on classic designs. Such trends often occur when there's a resurgence of interest in historical fashion elements, modified to fit current tastes.\n\nThe low-top design indicates a preference for comfort and ease of movement, which aligns with the sneaker culture's emphasis on functionality as well as style. The lace-up closure allows for a customizable fit, another aspect valued by consumers looking for both comfort and personalization in their footwear.\n\nOverall, the sneakers' style suggests a deliberate mix of nostalgia and modernity, appealing to those who appreciate the enduring appeal of classic designs while also seeking out fresh, updated versions."
5,,True,"The style of the clothing in the image leans towards a minimalist and modern aesthetic, which is characterized by simplicity, clean lines, and neutral colors. The dark grey suit jacket with its unstructured fit and lack of embellishment suggests a contemporary take on formal wear, possibly influenced by streetwear or avant-garde fashion trends that favor comfort and relaxed silhouettes over traditional tailoring.\n\nThe choice of a light grey t-shirt underneath adds to the understated look, providing a contrast without competing for attention. This layering approach is indicative of a trend where casual pieces are paired with more formal garments to create a balanced, versatile outfit. \n\nIn terms of color, the use of shades of grey demonstrates a preference for subdued, neutral tones that can be easily mixed and matched. Grey is often chosen for its versatility and ability to convey a sense of sophistication while remaining unobtrusive. It's a popular color in men's fashion due to its timeless appeal and the way it can complement various skin tones and other colors.\n\nOverall, the attire reflects a current fashion trend that values subtlety, comfort, and the blending of different styles, such as combining elements of streetwear with classic suiting."
6,,True,"The clothing styles in the image suggest a blend of practicality and fashion-forward design, indicative of contemporary urban attire. The individual on the left is wearing a brown jacket with fur-lined hood, which suggests functionality for colder weather while also incorporating a stylish element with its textured material. This type of jacket often appeals to those who seek both warmth and a rugged aesthetic.\n\nThe person on the right is dressed in a black coat with a prominent white fur collar, which exudes luxury and a bold fashion statement. The contrast between the dark coat and the bright fur adds visual interest and could indicate a trend towards mixing textures and colors as a way to create standout pieces. The braid hairstyle complements this look by adding an edgy, non-conformist touch that aligns with current trends favoring unique personal expressions.\n\nOverall, these outfits reflect a modern style where comfort meets chic, and there's an evident appreciation for distinctive details such as fur trims and contrasting color schemes. These elements are characteristic of fashion choices seen in city environments, particularly during transitional seasons like fall or spring when layering becomes necessary but style remains paramount."
7,,True,"The style of the clothes worn by the individual in the image reflects a casual, contemporary fashion trend. The use of muted colors such as the dark blue hoodie and light blue shirt suggests an inclination towards understated elegance rather than bold statements. This color palette is often associated with a minimalist aesthetic, which values simplicity and functionality over ornate details.\n\nThe combination of a hoodie with a collared shirt underneath indicates a layering technique that is popular in modern streetwear. Hoodies are versatile pieces that offer comfort while also being stylish when paired with dressier items like shirts or blazers. The unzipped state of the hoodie adds to the relaxed feel of the outfit, suggesting a preference for ease of movement and a laid-back attitude.\n\nIn terms of trends, this look could be seen as part of a broader shift away from rigid gender-specific fashion norms, where men's clothing has become more fluid and inclusive. The presence of both traditionally masculine (shirt) and feminine (hoodie) elements combined into one ensemble points to a trend towards cross-gender expression in fashion.\n\nOverall, the attire showcases a blend of comfort and style, characteristic of contemporary urban fashion that emphasizes personal expression through clothing choices."
8,,True,"The style of the clothing in the image reflects a practical and utilitarian approach to winter fashion, likely chosen for warmth rather than trendiness. The predominant use of puffer jackets suggests an emphasis on insulation against cold weather, which is common in urban environments during fall or winter seasons.\n\nStarting from the left, the individual wears a black puffer jacket paired with a plaid skirt, which combines classic elements like the puffer jacket with more feminine attire. This mix indicates a personal style that balances comfort with a nod to traditional women's wear.\n\nNext, the purple puffer jacket stands out due to its vibrant color, making it a focal point in the group. Purple can signify creativity and individuality, suggesting that the wearer may have a bold personality or enjoys standing out within their social circles.\n\nThe third person's olive green puffer jacket is more subdued compared to the others but still functional. Green is often associated with nature and tranquility, which might reflect the wearer's connection to these qualities or simply a preference for earthy tones.\n\nLastly, the navy blue puffer jacket, worn by the man on the right, complements his baseball cap, creating a cohesive look. Navy blue is a versatile color that pairs well with many other colors, indicating a preference for simplicity and functionality in his wardrobe choices.\n\nOverall, the group's clothing styles suggest they are dressed for cold weather, prioritizing warmth and comfort over current fashion trends. Their outfits could be indicative of working-class individuals who need durable and practical clothing suitable for outdoor activities in colder climates."
9,,True,"The style of the clothing in the image reflects a minimalist and modern aesthetic, which is characterized by simplicity, clean lines, and a monochromatic color palette. The black blazer with its wide lapels and relaxed fit aligns with contemporary fashion trends that favor comfort and understated elegance over bold statements or excessive detailing.\n\nBlack as a color choice is timeless and versatile, often associated with sophistication and a sense of authority. It's also a staple in many wardrobes due to its ability to pair well with various other colors and styles. The white garment underneath adds contrast without being overly distracting, maintaining the overall sleek look.\n\nThe trend here seems to be towards unisex or gender-neutral clothing, where traditional male and female fashion boundaries are blurred. This shift is part of a broader movement in fashion towards inclusivity and fluidity in dress, allowing individuals to express themselves freely without being confined to rigid gender norms.\n\nIn summary, the clothes in the image represent a modern, minimalist approach to fashion, emphasizing comfort, versatility, and an unisex appeal."


# 3. LLM을 이용한 키워드 분석 및 보고서 작성

## 14-9 언어 모델(LLM) 로드

In [None]:
from vllm import LLM, SamplingParams

## vLLM 라이브러리를 사용하여 'LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct' 모델을 로드
## gpu_memory_utilization은 GPU 메모리 사용 비율을 0.5로 설정
## max_model_len은 모델이 처리할 수 있는 최대 토큰 길이를 10000으로 설정
llm = LLM(model='LGAI-EXAONE/EXAONE-3.5-2.48-instruct', gpu_memory_utilization=0.5, max_model_len=10000)

## 14-10 색상 정보 추출

In [None]:
from vllm import SamplingParams ## SamplingParams 임포트가 필요

def extract_color(style):
  ## 주어진 스타일 설명 텍스트에서 색상을 한글로 추출하는 함수
  prompt = [
      {
          "role": "system",
          "content": "You are EXAONE model from LG AI Research, a helpful assistant."
      },
      {
          "role": "user",
          "content": f"다음의 글에서 색상을 한글로 추출해주세요. 색상 외에 다른 정보는 적지 말아주세요." # vlm이 작성한 글에서 색상 정보 추출, 한글로 번역하면서
      }
  ]
  ## 샘플링 파라미터 설정 (온도, top_p, 최대 토큰 수)
  sampling_params = SamplingParams(temperature=0.2, top_p=0.95, max_tokens=1024)
  ## LLM 모델을 사용하여 프롬프트에 대한 응답 생성
  result = llm.chat(prompt, sampling_params)[0].outputs[0].text
  print(result)
  return result

## DataFrame의 'style' 열에 extract_color 함수를 적용
## 결과는 'color'라는 새로운 열에 저장
df['color'] = df['style'].apply(extract_color)

## 14-11 스타일 키워드 추출

In [None]:
from vllm import SamplingParams ## SamplingParams 임포트가 필요

def extract_style(style):
  ## 주어진 스타일 설명 텍스트에서 스타일 키워드를 한글로 추출하는 함수
  prompt = [
      {
          "role": "system",
          "content": "You are EXAONE model from LG AI Research, a helpful assistant."
      },
      {
          "role": "user",
          "content": f"다음의 글에서 스타일 키워드를 한글로 추출해주세요. 스타일 키워드 외에 다른 정보는 적지 마세요." # vlm이 작성한 글에서 스타일 키워드 추출, 한글로 번역하면서
      }
  ]
  ## 샘플링 파라미터 설정 (온도, top_p, 최대 토큰 수)
  sampling_params = SamplingParams(temperature=0.2, top_p=0.95, max_tokens=1024)
  ## LLM 모델을 사용하여 프롬프트에 대한 응답 생성
  result = llm.chat(prompt, sampling_params)[0].outputs[0].text
  print(result)
  return result

## DataFrame의 'style' 열에 extract_style 함수를 적용 (함수 이름은 이전과 동일하지만 기능 변경)
## 결과는 'keyword'라는 새로운 열에 저장
df['keyword'] = df['style'].apply(extract_style)

In [None]:
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

## 14-12 텍스트 데이터 정제

In [None]:
import re

def clean_text(text):
    ## 텍스트에서 특수 문자 및 HTML 태그를 제거하고 소문자로 변환하는 함수
    if isinstance(text, str):
       ## 영문, 숫자, 한글, 공백을 제외한 모든 문자 제거
       text = re.sub(r'[^a-zA-Z0-9가-힣\s]', '', text)
       ## HTML 태그 제거
       text = re.sub(r'<[^>]*>', '', text)
       ## 텍스트를 소문자로 변환
       text = text.lower()
       return text
    else:
        return ""

## 'color' 열의 텍스트 데이터 정제
df['color'] = df['color'].apply(clean_text)
## 'keyword' 열의 텍스트 데이터 정제
df['keyword'] = df['keyword'].apply(clean_text)

## 14-13 워드 클라우드 생성 및 시각화

In [None]:
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def get_word_count(df):
    ## DataFrame의 'color'와 'keyword' 열에서 단어 빈도를 계산하는 함수
    if not df.empty:
        ## 'color' 열의 모든 단어를 리스트로 합침
        all_nouns = df['color'].apply(str.split).sum()
        ## 'keyword' 열의 모든 단어를 추가
        all_nouns += df['keyword'].apply(str.split).sum()
        ## '색상' 단어를 제외한 모든 단어를 필터링
        all_nouns = [word for word in all_nouns if word not in ['색상']]
        ## 단어 빈도를 Counter 객체로 반환
        return Counter(all_nouns)
    return Counter() ## DataFrame이 비어있으면 빈 Counter 반환

def create_wordcloud(word_count):
    ## 단어 빈도수를 기반으로 워드 클라우드를 생성하고 시각화하는 함수
    if not word_count: ## 단어 빈도가 없으면 워드클라우드 생성하지 않음
        print("No words to generate word cloud.")
        return

    wordcloud = WordCloud(
        width=800,
        height=400,
        background_color='white',
        colormap='viridis',
        font_path='/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf' ## 한글 폰트 경로 지정
        ).generate_from_frequencies(word_count)

    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off") ## 축 표시 제거
    plt.show() ## 워드 클라우드 출력

## DataFrame에서 단어 빈도 계산
word_count = get_word_count(df)
## 계산된 단어 빈도로 워드 클라우드 생성 및 시각화
create_wordcloud(word_count)

## 14-14 트렌드 분석 보고서 생성 프롬프트 구성 및 실행

## 14-15 분석 보고서 시각화

In [None]:
from vllm import SamplingParams ## SamplingParams 임포트가 필요

## 시스템 메시지로 시작하는 프롬프트 리스트 초기화
prompt = [
    {
        "role": "system",
        "content": "You are EXAONE model from LG AI Research, a helpful assistant."
    }
]
## DataFrame의 각 행을 순회하며 '스타일 노트'와 '이미지 URL'을 사용자 메시지로 추가
for row in df.itertuples():
  prompt.append({"role": "user", "content": f"스타일 노트 : {row.style}\ㅜ이미지 url: {row.image}"})
## 마지막으로, 종합적인 트렌드 분석 보고서 작성을 요청하는 사용자 메시지 추가
## 보고서 제목, 내용의 전문성, 마크다운 형식, 예시 이미지 포함을 지시
prompt.append({"role": "user", "content": "주어진 스타일 노트를 토대로 종합적인 트렌드 방향 분석 보고서를 작성해주세요."})

## 샘플링 파라미터 설정 (온도, top_p, 최대 토큰 수)
sampling_params = SamplingParams(temperature=0.2, top_p=0.95, max_tokens=4096)
## LLM 모델을 사용하여 구성된 프롬프트에 대한 응답 생성
result = llm.chat(prompt, sampling_params)[0].outputs[0].text

In [None]:
from IPython.display import display, Markdown

## LLM으로부터 생성된 결과(Markdown 형식의 보고서)를 Jupyter 환경에 표시
display(Markdown(result))