<a href="https://colab.research.google.com/github/mors119/Data-Analysis-with-Open-Source/blob/main/%EC%98%A4%ED%94%88%EC%86%8C%EC%8A%A4_%EB%8D%B0%EC%9D%B4%ED%84%B0_%EB%B6%84%EC%84%9D_14%EA%B0%95.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 14강 비정형 데이터 분석 : 패션 사진 데이터 활용

### 목표

- 비정형 데이터를 인공지능 모델로 분석하여 실무에서 활용 가능한 보고서 형태로 가공

- 패션 트렌드라는 구체적인 주제를 통해, 비정형 데이터 분석의 실질적인 활용 방안을 경험하고자 함


### 분석 프로세스 개요

1. 데이터 수집
  - requests를 이용한 RSS 데이터 수집
  - lxml을 이용한 XML 파싱
  - 이미지 데이터 추출
2. VLM을 이용한 이미지 분석
  - 프롬프트를 이용한 이미지 필터링
  - 프롬프트를 이용한 스타일 분석
3. LLM을 이용한 키워드 분석 및 보고서 작성
  - 텍스트 전처리
  - 색상 및 스타일 키워드 추출
  - 워드 클라우드 분석
  - 보고서 작성

# 주의 : 런타임 GPU 로 설정 필요

In [None]:
# 4bit VLM 처리를 위한 bitsandbytes 설치
# LLM 처리를 위한 VLLM 설치 (오래걸리는 작업(>5분)이므로 미리 실행!)
!pip install bitsandbytes==0.45.3 vllm==0.7.3 transformers==4.48.2
# 필요 시 세션 재시작

In [None]:
# 한글 처리를 위한 matplotlib 설정 (1)

!sudo apt-get install -y fonts-nanum
!sudo fc-cache –fv
!rm ~/.cache/matplotlib -rf

- 런타임 -> 세션 다시 시작

In [1]:
# 한글 처리를 위한 matplotlib 설정 (2)

import matplotlib.pyplot as plt
plt.rc('font', family='NanumBarunGothic')

# 1. 데이터 수집 및 전처리

## 14-1 RSS 피드에서 이미지 URL 추출

In [7]:
import requests
from lxml import etree
from lxml.html import fromstring
import pandas as pd

def extract_unique_images(rss_url):
    ## 주어진 RSS 피드 URL에서 고유한 이미지 URL들을 추출하는 함수 정의
    try:
        ## requests 라이브러리를 사용하여 RSS 피드 URL로부터 내용을 가져옴
        response = requests.get(rss_url)
        ## 가져온 XML 응답 내용을 lxml의 etree.fromstring으로 파싱하여 XML 트리 root를 생성
        root = etree.fromstring(response.content)
        image_urls = set()

        ## XML 트리에서 모든 'item' 태그를 XPath를 사용하여 순회
        for item in root.xpath('//item'):
            description = item.find('description')
            if description is not None and description.text:
                ## description의 텍스트 내용을 lxml.html.fromstring으로 파싱하여 HTML 트리를 생성
                html_tree = fromstring(description.text)
                ## HTML 트리에서 첫 번째 <img> 태그의 'src' 속성 값을 XPath를 사용하여 추출
                img_url = html_tree.xpath('string(//img/@src)')
                if img_url:
                    image_urls.add(img_url)

        return list(image_urls)

    except Exception as e:
        ## 오류 발생 시 오류 메시지를 출력하고 빈 리스트를 반환
        print(f"Error occurred: {e}")
        return []

rss_url = "https://glltn.com/feed/"
## extract_unique_images 함수를 호출하여 고유한 이미지 URL들을 추출
unique_images = extract_unique_images(rss_url)

## 추출된 이미지 URL 리스트를 사용하여 'image'라는 열을 가진 pandas DataFrame을 생성
df = pd.DataFrame(unique_images, columns=["image"])

In [8]:
df

Unnamed: 0,image
0,https://glltn.com/wp-content/blogs.dir/1/files...
1,https://glltn.com/wp-content/blogs.dir/1/files...
2,https://glltn.com/wp-content/blogs.dir/1/files...
3,https://glltn.com/wp-content/blogs.dir/1/files...
4,https://glltn.com/wp-content/blogs.dir/1/files...
5,https://glltn.com/wp-content/blogs.dir/1/files...
6,https://glltn.com/wp-content/blogs.dir/1/files...
7,https://glltn.com/wp-content/blogs.dir/1/files...
8,https://glltn.com/wp-content/blogs.dir/1/files...
9,https://glltn.com/wp-content/blogs.dir/1/files...


## 14-2 수집 데이터 확인

In [9]:
from IPython.display import display, HTML

def path_to_image_html(path):
    ## 이미지 경로를 HTML img 태그로 변환하는 함수
    return f'<img src="{path}" width="300" />'

## DataFrame의 스타일을 설정하여 이미지 너비를 300px로 지정
df.style.set_table_styles([{'selector': 'img', 'props': 'width: 300px;'}])

## DataFrame을 HTML로 변환하여 출력. 이미지 열은 path_to_image_html 함수로 포맷팅
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

Unnamed: 0,image
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,


## 2. VLM을 이용한 이미지 분석

## 14-3 VLM 모델 로드

In [None]:
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer

## 'openbmb/MiniCPM-V-2_6-int4' 모델을 사전 훈련된 가중치와 함께 로드
## trust_remote_code=True는 허브에서 사용자 정의 코드를 실행할 수 있도록 허용
model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
## 로드된 모델에 해당하는 토크나이저를 로드
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
## 모델을 평가 모드로 설정 (드롭아웃 등 훈련 시에만 필요한 기능 비활성화)
model.eval()

![](https://farm3.staticflickr.com/2677/4434956914_6e95a22940_z.jpg)

## 14-4 이미지 질문 응답 예시

In [None]:
from transformers import set_seed

## 재현성을 위해 시드(seed)를 42로 설정
set_seed(42)
## 예시 이미지 URL 정의
image_url = 'https://farm3.staticflickr.com/2677/4434956914_6e95a22940_z.jpg'
## requests로 이미지 다운로드 후 PIL Image 객체로 열고 RGB 형식으로 변환
image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
## 이미지에 대한 질문 정의
question = 'how many cats in the photo?'
## 모델 입력 형식에 맞춰 메시지 구성 (이미지와 질문 포함)
msgs = [{'role': 'user', 'content': [image, question]}]
## 모델의 chat 함수를 호출하여 이미지와 질문에 대한 응답 생성
result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
## 모델의 응답 출력
print(result)

In [14]:
set_seed(86)
## 이미지에 대한 질문을 업데이트. 책 표지의 고양이도 포함하도록 요청
question = 'how many cats in the photo? including the books cover.'
## 모델 입력 형식에 맞춰 메시지 구성 (이전에 로드된 이미지와 업데이트된 질문 포함)
msgs = [{'role': 'user', 'content': [image, question]}]
## 모델의 chat 함수를 호출하여 업데이트된 질문에 대한 응답 생성
result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
## 모델의 응답 출력
print(result)

2


In [13]:
set_seed(42)
## 이미지에 대한 질문을 'describe the photo'로 설정하여 이미지 내용을 설명하도록 요청
question = 'describe the photo'
## 모델 입력 형식에 맞춰 메시지 구성 (이전에 로드된 이미지와 설명 요청 질문 포함)
msgs = [{'role': 'user', 'content': [image, question]}]
## 모델의 chat 함수를 호출하여 이미지에 대한 설명을 생성
result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
## 모델의 응답 (이미지 설명) 출력
print(result)

The photo shows a book with the title "why dogs are better than cats" and an image of a cat sitting on top of a dog's head. The book is placed on a flat surface, and next to it stands a real cat that appears to be looking at the book cover with some curiosity or disapproval.


## 14-5 의류 이미지 여부 판단

In [15]:
def is_picture_of_clothing(image_url):
    ## 이미지 URL이 의류 사진인지 판단하는 함수
    # 의류가 포함된 사진인지 확인하는 질문 작성 (영어로)
    question = 'Is this a picture of clothing? MUST say yes or no.'
    image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    msgs = [{'role': 'user', 'content': [image, question]}]
    result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer, temperature=0.1)
    print(result)
    ## 응답에 'yes'가 포함되어 있는지 확인하여 True/False 반환
    return 'yes' in result.lower()

## DataFrame의 'image' 열에 함수를 적용하여 'is_clothing' 열에 결과 저장
df['is_clothing'] = df['image'].apply(is_picture_of_clothing)

Yes, this image is of clothing. It appears to be a fashion photograph showcasing the individual's outfit, which includes a black blazer and tie-dye trousers. The focus on the attire suggests that it may be used for promotional or retail purposes, highlighting the style and design elements of the garments.
Yes.
Yes, this image appears to be a picture of clothing. The focus is on the attire worn by the individual, specifically highlighting the shirt and jeans as key elements of the outfit. The style and fit of the clothing are presented in a manner that suggests it could be used for fashion or retail purposes, where the aim is to showcase the garments themselves rather than the person wearing them.
Yes, this image is of clothing. It appears to be a fashion shoot showcasing winter outerwear, specifically parkas or coats designed for cold weather. The focus on the garments and the models' poses suggest that the purpose of the photograph is to display these items of clothing in a way that h

## 14-6 의류 판단 결과 시각화

In [None]:
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

## 14-7 의류 이미지 필터링

In [None]:
## 'is_clothing' 열의 값이 True인 행들만 필터링하여 DataFrame을 업데이트
df = df[df['is_clothing']]
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))


## 14-8 의류 스타일 분석

In [20]:
def describe_style(image_url):
    ## 주어진 이미지 URL의 의류 스타일을 분석하는 함수
    question = 'Analyze the style of the clothes. Please let me explain the colors and trend changes.'
    image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    msgs = [{'role': 'user', 'content': [image, question]}]
    ## 모델의 chat 함수를 호출하여 이미지에 대한 스타일 분석 응답 생성
    result = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
    return result

## 필터링된 DataFrame의 'image' 열에 describe_style 함수를 적용
## 결과는 'style'이라는 새로운 열에 저장
df['style'] = df['image'].apply(describe_style)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['style'] = df['image'].apply(describe_style)


In [21]:
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

Unnamed: 0,image,is_clothing,style
0,,True,"The style of the clothes in the image suggests a blend of classic and contemporary fashion trends. The black blazer with its mandarin collar is reminiscent of traditional menswear, often associated with formality and sophistication. However, the modern twist comes from the casual fit of the blazer and the presence of large buttons that add an edgy detail to the otherwise conservative piece.\n\nThe white turtleneck shirt underneath adds a layer of depth to the outfit, providing both warmth and a contrast against the dark blazer. It's a timeless choice that can be seen in various fashion eras, indicating a nod to classic styles.\n\nThe tie-dye pants introduce a bold, artistic element to the ensemble. Tie-dye has been a recurring trend in fashion, often symbolizing creativity and non-conformity. This particular pair, with its deep blue hues, stands out as a statement piece, suggesting a fusion of streetwear influences with more formal elements like the blazer.\n\nOverall, the combination of these items reflects a transitional look that could appeal to those who appreciate a mix of old and new aesthetics, bridging the gap between traditional men's wear and modern street-style trends."
1,,True,"The sweater worn by the man in the image exhibits a style that is often associated with traditional or folk-inspired knitwear. The use of earthy tones such as beige, dark brown, and shades of green suggests a preference for natural colors which are commonly seen in casual or outdoor clothing. The pattern on the sweater includes geometric shapes and motifs that could be indicative of cultural heritage or regional design trends.\n\nIn terms of trend changes, this type of sweater can be considered somewhat timeless due to its classic color palette and simple yet intricate patterns. However, it may not follow the more recent fashion trends that favor minimalism, monochromatic palettes, or highly graphic prints. Instead, this garment seems to lean towards comfort and functionality, possibly appealing to those who prefer a more understated and possibly vintage-inspired look."
2,,True,"The style of the clothes in the image reflects a casual and slightly vintage-inspired aesthetic. The off-white, long-sleeve button-up shirt is reminiscent of classic workwear or possibly a vintage piece that has been modernized for contemporary wear. Such shirts are often associated with comfort and versatility, making them suitable for various occasions from casual outings to more relaxed business settings.\n\nThe color choice of pale yellow adds a soft, summery feel to the outfit, suggesting it might be part of a spring or summer collection. This hue is not only trendy but also evokes a sense of freshness and simplicity. Paired with light blue denim shorts, which have a faded wash giving them a worn-in look, the ensemble strikes a balance between laid-back and stylish.\n\nIn terms of trend changes, this combination of a loose-fitting, unbuttoned shirt with denim shorts is a nod to streetwear influences, where comfort and ease of movement are key. The relaxed fit and neutral colors suggest an effortless style, popular in recent fashion trends that favor minimalism and timeless pieces over fast-fashion items. Overall, the attire presented is indicative of a current fashion preference for comfortable yet fashionable clothing that can transition easily between different seasons."
3,,True,"The style of the clothes in the image suggests a contemporary, utilitarian fashion trend that emphasizes comfort and functionality. The olive green coats are practical for cold weather, with their high hoods and large pockets, which indicate a design focus on warmth and storage. This color choice is often associated with military or outdoor gear, suggesting a nod to rugged, versatile clothing suitable for various environments.\n\nThe grey hoodies underneath add a layer of casualness and ease to the overall look, blending seamlessly with the muted tones of the outerwear. Grey is a neutral color that complements a wide range of hues, making it a popular choice for layering pieces. The combination of these garments reflects a modern approach to winter fashion where comfort and style intersect, catering to those who value both aesthetics and utility in their wardrobe choices."
4,,True,"The style of the clothes worn by the individual in the photograph can be described as minimalist and utilitarian. The dark colors, such as black or navy, are often associated with a classic and timeless look that is versatile for various occasions. This color choice also tends to give off a sophisticated and understated vibe.\n\nMinimalist fashion typically avoids excessive ornamentation and focuses on clean lines, simple shapes, and functional design. The rolled-up sleeves suggest a casual yet thoughtful approach to dressing, indicating an attention to detail without being overly formal. The belted waist adds a practical element to the outfit, providing both aesthetic interest and functionality.\n\nIn terms of trend changes, minimalist styles have been popular for several years due to their simplicity and elegance. They offer a modern take on traditional clothing items, making them suitable for contemporary wardrobes. The combination of a blazer-like top with more relaxed pants suggests a blend of formality and comfort, which is a common trend in current fashion circles where versatility and ease of movement are highly valued."
5,,True,"The style of the clothes in the image suggests a contemporary fashion sense with an emphasis on simplicity and comfort. The dark green shirt has a classic design, likely made from a sturdy fabric suitable for casual wear or possibly workwear, given its robust appearance. The burgundy collar peeking under the shirt adds a subtle pop of color, which is a common trend in layering to create visual interest without overwhelming the overall look.\n\nThe patterned pants are particularly noteworthy as they introduce texture and pattern into an otherwise solid-colored outfit. This choice indicates a blend of traditional and modern styles, where patterns have been making a resurgence in men's fashion, often seen in both casual and smart-casual settings.\n\nOverall, the ensemble reflects a transitional style that could be versatile enough for various occasions, blending functionality with a touch of personal flair through color coordination and pattern selection."
6,,True,"The shoes in the image exhibit a style that is practical and utilitarian, often associated with outdoor activities such as hiking or trekking. The color palette of earthy tones—specifically shades of brown—is indicative of a design that aims to blend with natural environments, which is a common trend in outdoor gear. This choice of color not only serves a functional purpose but also aligns with current fashion trends that favor natural, muted colors over bright, bold hues.\n\nIn terms of trend changes, there has been a noticeable shift towards more sustainable and eco-friendly practices in the fashion industry, which includes the use of materials that are both durable and environmentally conscious. The suede-like material suggests an effort to provide comfort while potentially using less water-intensive production processes compared to genuine leather. Additionally, the sturdy construction and reinforced areas like the toe box and heel counter indicate a focus on durability and longevity, appealing to consumers who prioritize quality and sustainability in their purchases.\n\nOverall, these shoes reflect a contemporary trend where functionality meets environmental consciousness, catering to a market segment that values both performance and ethical considerations in their clothing choices."
8,,True,"The style of the clothes worn by the individual in the image leans towards a minimalist and monochromatic aesthetic. The black color is often associated with simplicity, elegance, and versatility, making it a popular choice for casual wear as well as fashion statements. The Henley shirt is a classic piece that has seen a resurgence in popularity due to its timeless design and comfort. It combines elements of both streetwear and smart-casual styles, which aligns with contemporary fashion trends that favor ease of movement and understated elegance.\n\nThe long sleeves and relaxed fit suggest an emphasis on comfort and possibly an influence from urban or hipster fashion, where oversized garments are favored for their laid-back look. This trend has been prevalent in recent years, especially among younger demographics who prioritize comfort without sacrificing style.\n\nOverall, the outfit reflects a modern, effortless chic that is both stylish and functional, catering to those who prefer a no-nonsense approach to dressing while still maintaining a sense of personal expression through their clothing choices."
10,,True,"The clothing style in the image leans towards a casual, possibly streetwear aesthetic. The knit sweater is a classic piece that has seen numerous fashion trends over the years but remains timeless due to its versatility and comfort. The choice of a dark grey color for the sweater suggests a preference for neutral tones, which are often favored for their ability to pair well with various other colors and styles.\n\nThe beanie's ribbed texture and the contrasting yellow stripes add a subtle touch of pattern and color without being overly bold or distracting. This kind of accessory can serve both a functional purpose—providing warmth—and a stylistic one, contributing to an overall look that is both practical and fashionable.\n\nIn terms of trend changes, the combination of a high-necked sweater and a beanie could suggest an influence from winter fashion trends where layering and headwear are essential for warmth but also serve as statement pieces. The simplicity of the outfit indicates a preference for understated elegance, which has been a recurring theme in fashion, especially during cooler seasons when more fabric is worn.\n\nOverall, the clothes reflect a modern, perhaps urban style that balances functionality with contemporary fashion sensibilities."
11,,True,"The style of the clothes in the image leans towards a casual, utilitarian aesthetic with an emphasis on practicality and comfort. The olive green jacket is reminiscent of military or outdoor gear, which has seen a resurgence in popularity due to its functionality and versatility. This color choice is often associated with earthy tones that are trending for their ability to blend with natural environments and urban settings alike.\n\nUnderneath the jacket, there's a layering technique that adds depth and texture to the outfit. Layering is a common trend in fashion as it allows for adaptability to changing weather conditions while also providing a more dynamic visual interest. The light blue striped shirt underneath introduces a subtle pattern without being overly flashy, adhering to a minimalist yet stylish approach.\n\nThe dark pants complement the overall look by balancing out the lighter colors above. They provide a solid base that grounds the ensemble, preventing it from appearing too busy or overwhelming.\n\nOverall, the clothing style suggests a preference for timeless pieces that can be mixed and matched to create different looks, reflecting a contemporary take on classic styles."


# 3. LLM을 이용한 키워드 분석 및 보고서 작성

## 14-9 언어 모델(LLM) 로드

In [None]:
from vllm import LLM, SamplingParams

## vLLM 라이브러리를 사용하여 'LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct' 모델을 로드
## gpu_memory_utilization은 GPU 메모리 사용 비율을 0.5로 설정
## max_model_len은 모델이 처리할 수 있는 최대 토큰 길이를 10000으로 설정
llm = LLM(model='LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct', gpu_memory_utilization=0.5, max_model_len=10000)

## 14-10 색상 정보 추출

In [None]:
from vllm import SamplingParams ## SamplingParams 임포트가 필요

def extract_color(style):
  ## 주어진 스타일 설명 텍스트에서 색상을 한글로 추출하는 함수
  prompt = [
      {
          "role": "system",
          "content": "You are EXAONE model from LG AI Research, a helpful assistant."
      },
      {
          "role": "user",
          "content": f"다음의 글에서 색상을 한글로 추출해 주세요. 색상 외의 다른 정보는 다른 정보는 적지 말아주세요.\n{style}" # vlm이 작성한 글에서 색상 정보 추출, 한글로 번역하면서
      }
  ]
  ## 샘플링 파라미터 설정 (온도, top_p, 최대 토큰 수)
  sampling_params = SamplingParams(temperature=0.2, top_p=0.95, max_tokens=1024)
  ## LLM 모델을 사용하여 프롬프트에 대한 응답 생성
  result = llm.chat(prompt, sampling_params)[0].outputs[0].text
  print(result)
  return result

## DataFrame의 'style' 열에 extract_color 함수를 적용
## 결과는 'color'라는 새로운 열에 저장
df['color'] = df['style'].apply(extract_color)

## 14-11 스타일 키워드 추출

In [None]:
from vllm import SamplingParams ## SamplingParams 임포트가 필요

def extract_style(style):
  ## 주어진 스타일 설명 텍스트에서 스타일 키워드를 한글로 추출하는 함수
  prompt = [
      {
          "role": "system",
          "content": "You are EXAONE model from LG AI Research, a helpful assistant."
      },
      {
          "role": "user",
          "content": f"다음의 글에서 스타일 캐워드를 한글로 추출해주세요. 스타일 키워드 외에 다른 정보는 적지 말아주세요." # vlm이 작성한 글에서 스타일 키워드 추출, 한글로 번역하면서
      }
  ]
  ## 샘플링 파라미터 설정 (온도, top_p, 최대 토큰 수)
  sampling_params = SamplingParams(temperature=0.2, top_p=0.95, max_tokens=1024)
  ## LLM 모델을 사용하여 프롬프트에 대한 응답 생성
  result = llm.chat(prompt, sampling_params)[0].outputs[0].text
  print(result)
  return result

## DataFrame의 'style' 열에 extract_color 함수를 적용 (함수 이름은 이전과 동일하지만 기능 변경)
## 결과는 'keyword'라는 새로운 열에 저장
df['keyword'] = df['style'].apply(extract_style)

In [None]:
display(HTML(df.to_html(escape=False, formatters=dict(**{'image': path_to_image_html}))))

## 14-12 텍스트 데이터 정제

In [None]:
import re

def clean_text(text):
    ## 텍스트에서 특수 문자 및 HTML 태그를 제거하고 소문자로 변환하는 함수
    if isinstance(text, str):
       ## 영문, 숫자, 한글, 공백을 제외한 모든 문자 제거
       text = re.sub(r'[^a-zA-Z0-9가-힣\s]', '', text)
       ## HTML 태그 제거
       text = re.sub(r'<[^>]*>', '', text)
       ## 텍스트를 소문자로 변환
       text = text.lower()
       return text
    else:
        return ""

## 'color' 열의 텍스트 데이터 정제
df['color'] = df['color'].apply(clean_text)
## 'keyword' 열의 텍스트 데이터 정제
df['keyword'] = df['keyword'].apply(clean_text)

## 14-13 워드 클라우드 생성 및 시각화

In [None]:
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def get_word_count(df):
    ## DataFrame의 'color'와 'keyword' 열에서 단어 빈도를 계산하는 함수
    if not df.empty:
        ## 'color' 열의 모든 단어를 리스트로 합침
        all_nouns = df['color'].apply(str.split).sum()
        ## 'keyword' 열의 모든 단어를 추가
        all_nouns += df['keyword'].apply(str.split).sum()
        ## '색상' 단어를 제외한 모든 단어를 필터링
        all_nouns = [word for word in all_nouns if word not in ['색상']]
        ## 단어 빈도를 Counter 객체로 반환
        return Counter(all_nouns)
    return Counter() ## DataFrame이 비어있으면 빈 Counter 반환

def create_wordcloud(word_count):
    ## 단어 빈도수를 기반으로 워드 클라우드를 생성하고 시각화하는 함수
    if not word_count: ## 단어 빈도가 없으면 워드클라우드 생성하지 않음
        print("No words to generate word cloud.")
        return

    wordcloud = WordCloud(
        width=800,
        height=400,
        background_color='white',
        colormap='viridis',
        font_path='/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf' ## 한글 폰트 경로 지정
        ).generate_from_frequencies(word_count)

    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off") ## 축 표시 제거
    plt.show() ## 워드 클라우드 출력

## DataFrame에서 단어 빈도 계산
word_count = get_word_count(df)
## 계산된 단어 빈도로 워드 클라우드 생성 및 시각화
create_wordcloud(word_count)

## 14-14 트렌드 분석 보고서 생성 프롬프트 구성 및 실행

## 14-15 분석 보고서 시각화

In [None]:
from vllm import SamplingParams ## SamplingParams 임포트가 필요

## 시스템 메시지로 시작하는 프롬프트 리스트 초기화
prompt = [
    {
        "role": "system",
        "content": "You are EXAONE model from LG AI Research, a helpful assistant."
    }
]
## DataFrame의 각 행을 순회하며 '스타일 노트'와 '이미지 URL'을 사용자 메시지로 추가
for row in df.itertuples():
  prompt.append({"role": "user", "content": f"스타일 노트: {row.style}\n이미지 url:{row.image}"})
## 마지막으로, 종합적인 트렌드 분석 보고서 작성을 요청하는 사용자 메시지 추가
## 보고서 제목, 내용의 전문성, 마크다운 형식, 예시 이미지 포함을 지시
prompt.append({"role": "user", "content": "주어진 스타일 노트를 토대로 종합적인 트랜드 방향의 분석 보고서를 작성해주세요. 보고서의 제목은 해외 룩북 스타일 분석입니다. 내용은 전문적이면서 명확하게 작성해주세요. 문서 형식은 markdown 형식으로 만들어주세요."})

## 샘플링 파라미터 설정 (온도, top_p, 최대 토큰 수)
sampling_params = SamplingParams(temperature=0.2, top_p=0.95, max_tokens=4096)
## LLM 모델을 사용하여 구성된 프롬프트에 대한 응답 생성
result = llm.chat(prompt, sampling_params)[0].outputs[0].text

In [None]:
from IPython.display import display, Markdown

## LLM으로부터 생성된 결과(Markdown 형식의 보고서)를 Jupyter 환경에 표시
display(Markdown(result))