In [1]:
import pandas as pd
import numpy as np
import requests
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency
import datetime
from xml.etree import ElementTree as ET
from urllib.parse import urlencode

In [2]:
def load_and_preprocess_data(file_path1, file_path2):
    """
    데이터 로드 및 전처리.
    """
    # 데이터 로드
    data1 = pd.read_csv(file_path1)
    data2 = pd.read_csv(file_path2)
    data = pd.concat([data1, data2], ignore_index=True)

    # 컬럼 이름 변경
    column_mapping = {
        'Item Purchased': '품목',
        'Category': '카테고리',
        'Color': '컬러',
        'Season': '날씨',
        'Purchase Amount (USD)': '가격'
    }
    data.rename(columns=column_mapping, inplace=True)

    # 결측값 처리
    data.fillna({
        '컬러': 'Unknown',
        '날씨': 'Unknown',
        '가격': data['가격'].median()
    }, inplace=True)

    # 온도와 강수량 추가 기본값
    data['온도'] = 20
    data['강수량'] = 0

    return categorize_items(data)


def categorize_items(data):
    """
    데이터에서 상의와 하의를 분류하는 함수.
    """
    # 품목 컬럼의 고유값 확인
    print("품목 컬럼 고유값:\n", data['품목'].unique())

    def categorize(item):
        # 실제 데이터를 기반으로 조건 추가
        if isinstance(item, str):
            item = item.lower()
            if any(keyword in item for keyword in ['blouse', 'sweater', 'shirt']):
                return 'Top'
            elif any(keyword in item for keyword in ['pants', 'jeans', 'skirt', 'shorts']):
                return 'Bottom'
        return None

    data['Category_Type'] = data['품목'].apply(categorize)
    print("Category_Type 분포:\n", data['Category_Type'].value_counts())
    return data


# 데이터 로드 및 분류 적용
filtered_data = load_and_preprocess_data(file_path1, file_path2)

# 분류된 데이터 크기 확인
top_data = filtered_data[filtered_data['Category_Type'] == 'Top']
bottom_data = filtered_data[filtered_data['Category_Type'] == 'Bottom']

print(f"Top data size: {len(top_data)}")
print(f"Bottom data size: {len(bottom_data)}")


NameError: name 'file_path1' is not defined

In [3]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

def train_individual_models(data):
    # 상의와 하의 데이터 분리
    top_data = data[data['Category_Type'] == 'Top']
    bottom_data = data[data['Category_Type'] == 'Bottom']
    
    if top_data.empty or bottom_data.empty:
        raise ValueError("상의 또는 하의 데이터가 비어 있습니다. 데이터셋을 확인하세요.")
    
    # 컬러 열을 숫자형으로 변환
    le_color = LabelEncoder()
    top_data['컬러'] = le_color.fit_transform(top_data['컬러'])
    bottom_data['컬러'] = le_color.transform(bottom_data['컬러'])

    # LabelEncoder 초기화 및 학습
    le_top_item = LabelEncoder()
    le_bottom_item = LabelEncoder()

    top_data['품목'] = le_top_item.fit_transform(top_data['품목'])
    bottom_data['품목'] = le_bottom_item.fit_transform(bottom_data['품목'])

    # 입력 변수와 타겟 변수 설정
    X_top = top_data[['컬러', '온도', '강수량']]
    y_top = top_data['품목']
    X_bottom = bottom_data[['컬러', '온도', '강수량']]
    y_bottom = bottom_data['품목']

    # 클래스 불균형 해결
    smote = SMOTE(random_state=42)
    X_top_resampled, y_top_resampled = smote.fit_resample(X_top, y_top)
    X_bottom_resampled, y_bottom_resampled = smote.fit_resample(X_bottom, y_bottom)

    # Train-Test Split
    X_train_top, X_test_top, y_train_top, y_test_top = train_test_split(X_top_resampled, y_top_resampled, test_size=0.2, random_state=42)
    X_train_bottom, X_test_bottom, y_train_bottom, y_test_bottom = train_test_split(X_bottom_resampled, y_bottom_resampled, test_size=0.2, random_state=42)

    # 모델 학습
    top_model = RandomForestClassifier(random_state=42)
    top_model.fit(X_train_top, y_train_top)

    bottom_model = RandomForestClassifier(random_state=42)
    bottom_model.fit(X_train_bottom, y_train_bottom)

    # 모델 평가
    print("상의 추천 모델 평가:")
    print(classification_report(y_test_top, top_model.predict(X_test_top)))
    print("하의 추천 모델 평가:")
    print(classification_report(y_test_bottom, bottom_model.predict(X_test_bottom)))

    return top_model, bottom_model, le_top_item, le_bottom_item


In [4]:
import requests
from urllib.parse import urlencode
import xml.etree.ElementTree as ET
import datetime

def get_weather(service_key, nx=55, ny=127):
    """
    초단기 실황 API를 호출하여 XML 데이터를 반환합니다.

    Parameters:
        service_key (str): 서비스 키 (디코딩된 값).
        nx (int): 예보 지점 X 좌표.
        ny (int): 예보 지점 Y 좌표.

    Returns:
        dict: 날씨 데이터 (파싱된 형식).
    """
    BASE_URL = "http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getUltraSrtNcst"

    # 현재 날짜와 시간
    now = datetime.datetime.now()
    base_date = now.strftime("%Y%m%d")
    base_hour = now.hour
    if now.minute < 40:
        base_hour -= 1
    base_time = f"{base_hour:02}00"

    # 요청 파라미터
    params = {
        "serviceKey": service_key,  # 디코딩된 서비스 키를 그대로 사용
        "pageNo": 1,
        "numOfRows": 1000,
        "dataType": "XML",  # XML 형식으로 요청
        "base_date": base_date,
        "base_time": base_time,
        "nx": nx,
        "ny": ny,
    }

    # 요청 URL 생성
    query_string = urlencode(params, doseq=True)
    url = f"{BASE_URL}?{query_string}"

    try:
        # API 요청
        response = requests.get(url)
        print("요청 URL:", url)
        print("응답 상태 코드:", response.status_code)

        if response.status_code == 200:
            # XML 파싱
            try:
                root = ET.fromstring(response.content)
                body = root.find("body")
                if body is None:
                    print("응답에 <body> 태그가 없습니다.")
                    return None

                items = body.find("items")
                if items is None:
                    print("응답에 <items> 태그가 없습니다.")
                    return None

                data = {}
                for item in items.findall("item"):
                    category = item.find("category").text
                    obsr_value = item.find("obsrValue").text
                    data[category] = obsr_value

                return data
            except Exception as e:
                print("XML 파싱 오류:", e)
                print("응답 내용 (일부):", response.text[:500])  # 응답 내용 일부 출력
                return None
        else:
            print("API 요청 실패:", response.text)
            return None

    except requests.exceptions.RequestException as e:
        print("API 요청 중 오류 발생:", e)
        return None


In [5]:
def recommend_outfit(weather_data, top_model, bottom_model, le_top_item, le_bottom_item):
    # 날씨 데이터를 입력 형식으로 변환
    input_data = {
        '컬러': 0,  # 기본값
        '온도': float(weather_data.get('T1H', 0)),
        '강수량': float(weather_data.get('RN1', 0)),
    }
    input_df = pd.DataFrame([input_data])

    # 상의 추천
    top_prediction = top_model.predict(input_df)
    top_recommendation = le_top_item.inverse_transform(top_prediction)

    # 하의 추천
    bottom_prediction = bottom_model.predict(input_df)
    bottom_recommendation = le_bottom_item.inverse_transform(bottom_prediction)

    return {
        "상의": top_recommendation[0],
        "하의": bottom_recommendation[0]
    }


In [6]:
file_path1 = "./data/shopping_trends.csv"
file_path2 = "./data/shopping_trends_updated.csv"
SERVICE_KEY_DECODED = "b1tZLYY9n9j+abXg7IAiQZoDQUe1zy2yel/NuPrIB59sdXUPVEdp5z5Wj6wTFGU/NotP3nwvunTDNVhdJRAJ3w=="

# 데이터 로드 및 전처리
filtered_data = load_and_preprocess_data(file_path1, file_path2)

# 모델 학습
top_model, bottom_model, le_top_item, le_bottom_item = train_individual_models(filtered_data)


품목 컬럼 고유값:
 ['Blouse' 'Sweater' 'Jeans' 'Sandals' 'Sneakers' 'Shirt' 'Shorts' 'Coat'
 'Handbag' 'Shoes' 'Dress' 'Skirt' 'Sunglasses' 'Pants' 'Jacket' 'Hoodie'
 'Jewelry' 'T-shirt' 'Scarf' 'Hat' 'Socks' 'Backpack' 'Belt' 'Boots'
 'Gloves']
Category_Type 분포:
 Category_Type
Top       1302
Bottom    1220
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_data['컬러'] = le_color.fit_transform(top_data['컬러'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bottom_data['컬러'] = le_color.transform(bottom_data['컬러'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_data['품목'] = le_top_item.fit_transform(top_data['품목'])
A value is trying to be set on a

상의 추천 모델 평가:
              precision    recall  f1-score   support

           0       0.29      0.33      0.31        76
           1       0.00      0.00      0.00        76
           2       0.18      0.27      0.21        56
           3       0.25      0.39      0.31        66

    accuracy                           0.24       274
   macro avg       0.18      0.25      0.21       274
weighted avg       0.18      0.24      0.20       274

하의 추천 모델 평가:
              precision    recall  f1-score   support

           0       0.34      0.30      0.32        73
           1       0.26      0.18      0.21        61
           2       0.33      0.49      0.40        73
           3       0.32      0.28      0.30        67

    accuracy                           0.32       274
   macro avg       0.31      0.31      0.31       274
weighted avg       0.32      0.32      0.31       274



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
weather_data = get_weather(SERVICE_KEY_DECODED)
if weather_data:
    recommendations = recommend_outfit(weather_data, top_model, bottom_model, le_top_item, le_bottom_item)
    print("추천된 의상:")
    print(f"상의: {recommendations['상의']}")
    print(f"하의: {recommendations['하의']}")
else:
    print("날씨 정보를 가져오는 데 실패했습니다.")

요청 URL: http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getUltraSrtNcst?serviceKey=b1tZLYY9n9j%2BabXg7IAiQZoDQUe1zy2yel%2FNuPrIB59sdXUPVEdp5z5Wj6wTFGU%2FNotP3nwvunTDNVhdJRAJ3w%3D%3D&pageNo=1&numOfRows=1000&dataType=XML&base_date=20241209&base_time=0500&nx=55&ny=127
응답 상태 코드: 200
추천된 의상:
상의: Sweater
하의: Shorts
