In [1]:
import pandas as pd
import plotly.express as px

df = pd.read_csv('categorized_data.csv')

print("데이터 로드")
display(df.head())

데이터 로드


Unnamed: 0,brand,name,price,event,img_url,category
0,7Eleven,HK)새싹보리500ml,2500,1+1,https://www.7-eleven.co.kr/upload/product/8809...,음료
1,7Eleven,LG)샤프란아우라1L(스윗만다린),12900,1+1,https://www.7-eleven.co.kr/upload/product/8801...,생활/위생용품
2,7Eleven,LG)샤프란아우라1L(매그놀리아),12900,1+1,https://www.7-eleven.co.kr/upload/product/8801...,생활/위생용품
3,7Eleven,아모레)미장센퍼펙트샴푸680ml_H,18000,1+1,https://www.7-eleven.co.kr/upload/product/8809...,생활/위생용품
4,7Eleven,아모레)미장센퍼펙트린스680ml_H,18000,1+1,https://www.7-eleven.co.kr/upload/product/8809...,생활/위생용품


카테고리별 가격 분포

In [5]:
fig_box = px.box(df, 
                 x='category', 
                 y='price', 
                 color='category',
                 title='💰 카테고리별 가격대 분포 (Y축 범위 제한)',
                 points="all")

fig_box.update_layout(yaxis_range=[0, 30000])

fig_box.show()

개당 가격과 할인율 계산

In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df = pd.read_csv('categorized_data.csv', encoding='utf-8-sig')

def calculate_metrics(row):
    price = row['price']
    event = str(row['event'])
    
    if '1+1' in event:
        unit_price = price / 2
        discount_rate = 50
    elif '2+1' in event:
        unit_price = (price * 2) / 3
        discount_rate = 33
    else:
        unit_price = price
        discount_rate = 0
    return pd.Series([unit_price, discount_rate])

df[['unit_price', 'discount_rate']] = df.apply(calculate_metrics, axis=1)
df['unit_price'] = df['unit_price'].astype(int)

print("가성비 지표 계산 완료")
df.head()

가성비 지표 계산 완료


Unnamed: 0,brand,name,price,event,img_url,category,unit_price,discount_rate
0,7Eleven,HK)새싹보리500ml,2500,1+1,https://www.7-eleven.co.kr/upload/product/8809...,음료,1250,50.0
1,7Eleven,LG)샤프란아우라1L(스윗만다린),12900,1+1,https://www.7-eleven.co.kr/upload/product/8801...,생활/위생용품,6450,50.0
2,7Eleven,LG)샤프란아우라1L(매그놀리아),12900,1+1,https://www.7-eleven.co.kr/upload/product/8801...,생활/위생용품,6450,50.0
3,7Eleven,아모레)미장센퍼펙트샴푸680ml_H,18000,1+1,https://www.7-eleven.co.kr/upload/product/8809...,생활/위생용품,9000,50.0
4,7Eleven,아모레)미장센퍼펙트린스680ml_H,18000,1+1,https://www.7-eleven.co.kr/upload/product/8809...,생활/위생용품,9000,50.0


In [8]:
# 브랜드별 상품 수 TOP 10
brand_top10 = df['brand'].value_counts().head(10).reset_index()
brand_top10.columns = ['brand', 'count']

fig_brand = px.bar(brand_top10, x='brand', y='count',
                   title='브랜드별 행사 상품 점유율 TOP 10',
                   color='count', color_continuous_scale='Reds',
                   text_auto=True)
fig_brand.show()

In [9]:
# 카테고리별 평균 할인율 계산
cat_discount = df.groupby('category')['discount_rate'].mean().sort_values(ascending=False).reset_index()

fig_discount = px.bar(cat_discount, x='category', y='discount_rate',
                      title='카테고리별 평균 할인율 (%)',
                      color='discount_rate',
                      labels={'discount_rate': '평균 할인율(%)'},
                      text_auto='.1f')
fig_discount.show()

In [14]:
import plotly.graph_objects as go

# 식사류 중 개당 가격(unit_price)이 낮은 순으로 TOP 10 추출
best_deals = df[df['category'] == '식사류'].sort_values(by='unit_price').head(10)

fig_deals = go.Figure(data=[go.Table(
    columnwidth = [80, 300, 80, 100, 100],
    header=dict(
        values=['<b>브랜드</b>', '<b>상품명</b>', '<b>행사</b>', '<b>원래 가격</b>', '<b>개당 가격</b>'],
        fill_color='royalblue',
        align='center',
        font=dict(color='white', size=12)
    ),
    cells=dict(
        values=[
            best_deals.brand, 
            best_deals.name, 
            best_deals.event, 
            best_deals.price.map('{:,}원'.format),
            best_deals.unit_price.map('{:,}원'.format)
        ],
        fill_color='lavender',
        align='left',
        font=dict(size=11)
    ))
])

fig_deals.update_layout(title_text="식사류 가성비 TOP 10 (개당 가격 기준)")
fig_deals.show()