In [9]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df = pd.read_csv('superstore.csv')
df = df.drop('记录数', axis=1)
df.head()

Unnamed: 0,Category,City,Country,Customer.ID,Customer.Name,Discount,Market,Order.Date,Order.ID,Order.Priority,...,Sales,Segment,Ship.Date,Ship.Mode,Shipping.Cost,State,Sub.Category,Year,Market2,weeknum
0,Office Supplies,Los Angeles,United States,LS-172304,Lycoris Saunders,0.0,US,2011-01-07 00:00:00.000,CA-2011-130813,High,...,19,Consumer,2011-01-09 00:00:00.000,Second Class,4.37,California,Paper,2011,North America,2
1,Office Supplies,Los Angeles,United States,MV-174854,Mark Van Huff,0.0,US,2011-01-21 00:00:00.000,CA-2011-148614,Medium,...,19,Consumer,2011-01-26 00:00:00.000,Standard Class,0.94,California,Paper,2011,North America,4
2,Office Supplies,Los Angeles,United States,CS-121304,Chad Sievert,0.0,US,2011-08-05 00:00:00.000,CA-2011-118962,Medium,...,21,Consumer,2011-08-09 00:00:00.000,Standard Class,1.81,California,Paper,2011,North America,32
3,Office Supplies,Los Angeles,United States,CS-121304,Chad Sievert,0.0,US,2011-08-05 00:00:00.000,CA-2011-118962,Medium,...,111,Consumer,2011-08-09 00:00:00.000,Standard Class,4.59,California,Paper,2011,North America,32
4,Office Supplies,Los Angeles,United States,AP-109154,Arthur Prichep,0.0,US,2011-09-29 00:00:00.000,CA-2011-146969,High,...,6,Consumer,2011-10-03 00:00:00.000,Standard Class,1.32,California,Paper,2011,North America,40


### 대륙별 카테고리 판매량

In [4]:
df['Order.Date'] = pd.to_datetime(df['Order.Date'])
category_sales = df.groupby(['Market', 'Category']).size().sort_values(ascending=False).to_frame('category sales').reset_index()
fig = px.bar(category_sales, x='Market', y='category sales', color='Category', barmode='group', text_auto=True)
fig.show()

### 대륙별 서브 카테고리 판매량

In [6]:
subcategory_sales = df.groupby(['Market', 'Sub.Category']).size().sort_values(ascending=False).to_frame('subcategory sales').reset_index()
fig = go.Figure()

markets = subcategory_sales["Market"].unique()
for market in markets:
    df_market = subcategory_sales[subcategory_sales["Market"] == market]
    fig.add_trace(
        go.Bar(
            x=df_market["Sub.Category"],
            y=df_market["subcategory sales"],
            name=market,  
            text=df_market['subcategory sales'],
            textposition='outside'
        )
    )

buttons = [
    {
        "label": "전체",
        "method": "update",
        "args": [{"visible": [True] * len(fig.data)}, {"title": "전체 대륙 서브 카테고리 판매량"}],
    }
]

for i, market in enumerate(markets):
    visibility = [False] * len(fig.data) 
    visibility[i] = True  
    buttons.append(
        {
            "label": market,
            "method": "update",
            "args": [{"visible": visibility}, {"title": f"{market} 서브 카테고리 판매량"}],
        }
    )

fig.update_layout(
    updatemenus=[
        {
            "buttons": buttons,
            "direction": "down",
            "showactive": True,
        }
    ],
    barmode="group")

fig.show()

### 대륙별 카테고리 판매 점유율

In [7]:
category_sales_cnt = df.groupby(['Market', 'Category']).size().sort_values(ascending=False).to_frame('sales cnt').reset_index()

fig = go.Figure()

for market in markets:
    df_market = category_sales_cnt[category_sales_cnt["Market"] == market]
    fig.add_trace(
        go.Pie(
            labels=df_market['Category'],
            values=df_market['sales cnt']
        )
    )

buttons = [
    {
        "label": "전체",
        "method": "update",
        "args": [{"visible": [True] * len(fig.data)}, {"title": "전체 대륙 카테고리 판매금액"}],
    }
]

for i, market in enumerate(markets):
    visibility = [False] * len(fig.data) 
    visibility[i] = True  
    buttons.append(
        {
            "label": market,
            "method": "update",
            "args": [{"visible": visibility}, {"title": f"{market} 카테고리 판매금액"}],
        }
    )

fig.update_layout(
    updatemenus=[
        {
            "buttons": buttons,
            "direction": "down",
            "showactive": True,
        }
    ])

fig.show()

### 지역별, 연도별 평균 배송비

In [13]:
shipping = df.groupby(['Year', 'Market'])['Shipping.Cost'].mean().to_frame('ship cost').reset_index()
shipping['Year'] = shipping['Year'].astype(str)
fig = px.bar(shipping, x='Market', y='ship cost', color='Year', barmode='group', text_auto='.1f', title='지역별 연도별 평균 배송비')
fig.show()

In [14]:
shipping_cost = df.groupby('Market')['Shipping.Cost'].mean().sort_values(ascending=False).to_frame('ship cost mean').reset_index()
fig = px.bar(shipping_cost, x='Market', y='ship cost mean', color='Market', text_auto='.1f', title='지역별 배송비 평균')
fig.show()

### 호주, 중국 카테고리별 매출 평균, 이익 평균

In [19]:
aus_china = df[(df.Country=='Australia') | (df.Country=='China')]
aus_china_sp = aus_china.groupby(['Year', 'Country', 'Category'])[['Sales', 'Profit']].mean().reset_index().sort_values('Year')

countries = aus_china_sp['Country'].unique()
categories = aus_china_sp['Category'].unique()

fig = go.Figure()

sales_traces = []
profit_traces = []

for metric in ['Sales', 'Profit']:
    for country in countries:
        for category in categories:
            subset = aus_china_sp[(aus_china_sp['Country'] == country) & (aus_china_sp['Category'] == category)]
            trace = go.Scatter(
                x=subset['Year'],
                y=subset[metric],
                mode='lines+markers',
                name=f"{country} - {category} ({metric})"
            )
            if metric == "Sales":
                sales_traces.append(trace)
            else:
                profit_traces.append(trace)

fig.add_traces(sales_traces + profit_traces)

total_traces = len(sales_traces) + len(profit_traces)
visible_sales = [True] * len(sales_traces) + [False] * len(profit_traces)
visible_profit = [False] * len(sales_traces) + [True] * len(profit_traces)

dropdown_buttons = [
    {"label": "Sales", "method": "update",
     "args": [{"visible": visible_sales}, {"title": "호주, 중국 카테고리별 판매 금액 평균"}]},
    {"label": "Profit", "method": "update",
     "args": [{"visible": visible_profit}, {"title": "호주, 중국 카테고리별 이익 평균"}]}
]

fig.update_layout(
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
        "x": 0.1,
        "xanchor": "left",
        "y": 1.15,
        "yanchor": "top",
    }],
    title="호주, 중국 카테고리별 판매 금액, 이익",
    xaxis_title="Year",
    yaxis_title="Amount"
)

fig.show()

### 호주, 중국 카테고리별 주문 건수

In [21]:
size_df = aus_china.groupby(['Year', 'Country', 'Category']).size().to_frame('주문건수').reset_index()
fig = go.Figure()

categories = size_df['Category'].unique()
countries = size_df['Country'].unique()

for category in categories:
    for country in countries:
        category_data = size_df[(size_df['Category'] == category) & (size_df['Country'] == country)]
        fig.add_trace(go.Scatter(
            x=category_data['Year'],
            y=category_data['주문건수'],
            mode='lines+markers',
            name=f"{country} - {category}",
            visible=(category == categories[0])  
        ))

dropdown_buttons = [
    {"label": category, "method": "update", "args": [{"visible": [category == cat for cat in categories for _ in countries]}, 
                                                    {"title": f"{category} 카테고리별 국가별 주문건수"}]}
    for category in categories
]
fig.update_layout(
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
        "x": 0.1,
        "xanchor": "left",
        "y": 1.15,
        "yanchor": "top",
    }],
    title="카테고리별 국가별 주문건수",
    xaxis_title="Year",
    yaxis_title="주문건수",
    template="plotly_dark"
)

fig.show()


In [22]:
# 31이하 13000개
purchase = df[df.Sales <= 31].groupby('Market').size().to_frame('low_amount').reset_index()
purchase['low_rate'] = purchase.low_amount/df.groupby('Market').size().values

# low_rate: 31달러 보다 적은 물품 구매하는 유저의 비율
purchase['high_amount'] = df[df.Sales >= df.Sales.quantile(0.75)].groupby('Market').size().values
purchase['high_rate'] = purchase.high_amount/df.groupby('Market').size().values

apac = df[(df.Market=='APAC') & (df.Country!='Mongolia')]

apac_purchase = apac[apac.Sales <= df.Sales.quantile(0.25)].groupby('Country').size().to_frame('low_amount').reset_index()
apac_purchase['low_rate'] = apac_purchase.low_amount / apac.groupby('Country').size().values
apac_purchase['high_amount'] = apac[apac.Sales >= df.Sales.quantile(0.75)].groupby('Country').size().values
apac_purchase['high_rate'] = apac_purchase.high_amount / apac.groupby('Country').size().values

apac_purchase.loc[len(apac_purchase)] = ['APAC_total', 1755, 0.159516, 3554, 0.323032]

apac_purchase = apac_purchase.sort_values('high_rate', ascending=False).reset_index(drop=True)

### APAC 나라별 비싼 제품 구매 비율

In [23]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=apac_purchase["Country"], 
    y=apac_purchase["low_rate"], 
    name="Low Rate",
    marker_color="blue",
    text=apac_purchase['low_rate'].round(3),
    textposition='outside'
))

fig.add_trace(go.Bar(
    x=apac_purchase["Country"], 
    y=apac_purchase["high_rate"], 
    name="High Rate",
    marker_color="red",
    text=apac_purchase['high_rate'].round(3),
    textposition='outside'
))

fig.update_layout(
    title="APAC 나라별 비싼 제품 구매 비율",
    xaxis_title="Country",
    yaxis_title="Rate",
    barmode="group",  
    xaxis_tickangle=-45,
    template='plotly_dark'
)

fig.show()

### 중국 가구 카테고리 연도별 매출 중앙값, 주문 건수의 관계

In [24]:
df['Order.Date'] = pd.to_datetime(df['Order.Date'])
df['year'] = df['Order.Date'].dt.year
ch = df[df.Country == 'China']
ch_furniture = ch[(ch.Category=='Furniture')].groupby('year').Sales.median().to_frame('sales median').reset_index()
ch_furniture['amount'] = ch[ch.Category=='Furniture'].groupby('year').size().values

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=ch_furniture["year"], 
    y=ch_furniture["sales median"], 
    text=ch_furniture['sales median'],
    textposition='top center',
    mode="lines+markers+text",  
    name="판매 금액 중앙값",
    line=dict(color="blue", width=2)
))

fig.add_trace(go.Scatter(
    x=ch_furniture["year"], 
    y=ch_furniture["amount"], 
    mode="lines+markers+text",  
    text = ch_furniture['amount'],
    textposition='bottom center',
    name="주문건수",
    line=dict(color="red", width=2, dash="dash"),
    yaxis="y2"
))


fig.update_layout(
    title="중국 가구 카테고리 연도별 판매 금액 중앙값, 주문건수",
    xaxis_title="Year",
    yaxis=dict(title="판매 금액 중앙값"),
    yaxis2=dict(title="주문건수", overlaying="y", side="right"),
    xaxis=dict(tickmode="linear", dtick=1),  
    legend=dict(x=0.9, y=1.2),
    template="plotly_dark"
)

fig.show()