In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df = pd.read_csv('superstore.csv')
df = df.drop('记录数', axis=1)
df.head()

Unnamed: 0,Category,City,Country,Customer.ID,Customer.Name,Discount,Market,Order.Date,Order.ID,Order.Priority,...,Sales,Segment,Ship.Date,Ship.Mode,Shipping.Cost,State,Sub.Category,Year,Market2,weeknum
0,Office Supplies,Los Angeles,United States,LS-172304,Lycoris Saunders,0.0,US,2011-01-07 00:00:00.000,CA-2011-130813,High,...,19,Consumer,2011-01-09 00:00:00.000,Second Class,4.37,California,Paper,2011,North America,2
1,Office Supplies,Los Angeles,United States,MV-174854,Mark Van Huff,0.0,US,2011-01-21 00:00:00.000,CA-2011-148614,Medium,...,19,Consumer,2011-01-26 00:00:00.000,Standard Class,0.94,California,Paper,2011,North America,4
2,Office Supplies,Los Angeles,United States,CS-121304,Chad Sievert,0.0,US,2011-08-05 00:00:00.000,CA-2011-118962,Medium,...,21,Consumer,2011-08-09 00:00:00.000,Standard Class,1.81,California,Paper,2011,North America,32
3,Office Supplies,Los Angeles,United States,CS-121304,Chad Sievert,0.0,US,2011-08-05 00:00:00.000,CA-2011-118962,Medium,...,111,Consumer,2011-08-09 00:00:00.000,Standard Class,4.59,California,Paper,2011,North America,32
4,Office Supplies,Los Angeles,United States,AP-109154,Arthur Prichep,0.0,US,2011-09-29 00:00:00.000,CA-2011-146969,High,...,6,Consumer,2011-10-03 00:00:00.000,Standard Class,1.32,California,Paper,2011,North America,40


In [None]:
date_sale = df.groupby('Year').agg({'Sales': 'sum'})

def Display_Sales_year():
    fig4 = px.line(date_sale, x=date_sale.index, y=date_sale.Sales,
                   color_discrete_sequence=["#0071CE"],
                   text=date_sale['Sales'].apply(lambda x: f'{x:,}'))

    scatter_trace = go.Scatter(
        x=date_sale.index,
        y=date_sale.Sales,
        mode='markers',
        marker=dict(size=10, color='#FFC220', line=dict(width=2, color="black")),  
        name='Sales',
        showlegend=False
    )

    fig4.add_trace(scatter_trace)

    fig4.update_traces(
        line=dict(width=5),
        marker=dict(size=10, color="#FFC220", line=dict(width=2, color="black")),  
        textposition='top center', 
        texttemplate='%{text}'  
    )

    y_max = date_sale.Sales.max() * 1.1  
    fig4.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray',
                      griddash='dot', tickfont=dict(color='gray'),
                      range=[2000000, y_max])  

    fig4.update_xaxes(
        tickmode='linear',  
        tick0=date_sale.index.min(), 
        dtick=1,  
        tickformat="%Y",  
        tickfont=dict(color='gray')
    )

    fig4.update_layout(
        title_text = f"<b>연간 매출</b>",
        title_font_size = 25,
        title_font_family = "Liberation Serif",
        title_font_color="#5b5b5b",
        xaxis_title = '',
        yaxis_title = '',
        plot_bgcolor= 'white',
    )

    fig4.show()

Display_Sales_year()


In [None]:
orders_per_year = df.groupby('Year')['Order.ID'].nunique().reset_index()

def Display_order_count_each_year():

    fig4 = px.line(orders_per_year, x='Year', y='Order.ID', color_discrete_sequence=["#0071CE"])


    scatter_trace = go.Scatter(
        x=orders_per_year['Year'],
        y=orders_per_year['Order.ID'],
        mode='markers',
        marker=dict(size=10, color="#FFC220"),
        showlegend=False
    )

    fig4.add_trace(scatter_trace)  
    fig4.update_traces(marker_line_width=3)  

    fig4.update_xaxes(
        tickmode='linear', 
        dtick=1,  
        tickformat="%Y", 
        tickfont=dict(color='gray')  
    )

    fig4.update_layout(
        title_text = "<b>연간 주문 건수</b>",  
        title_font_size = 25,
        title_font_family = "Liberation Serif",
        title_font_color="#5b5b5b",
        xaxis_title = '',  
        yaxis_title = 'Total Orders',  
        plot_bgcolor='white',  
    )

    fig4.update_yaxes(
        showgrid=True, gridwidth=0.5, gridcolor='lightgray',
        griddash='dot', tickfont=dict(color='gray')
    )

    fig4.show()

Display_order_count_each_year()

In [5]:
quantity_per_year = df.groupby('Year')['Quantity'].sum().reset_index()

def Display_quantity_each_year():

    fig4 = px.line(quantity_per_year, x='Year', y='Quantity', color_discrete_sequence=["#0071CE"])

    scatter_trace = go.Scatter(
        x=quantity_per_year['Year'],
        y=quantity_per_year['Quantity'],
        mode='markers',
        marker=dict(size=10, color="#FFC220"),
        name='Markers',
        showlegend=False
    )

    fig4.add_trace(scatter_trace)
    fig4.update_traces(marker_line_width=3)  

    fig4.update_xaxes(
        tickmode='linear',  
        dtick=1,  
        tickformat="%Y",  
        tickfont=dict(color='gray')  
    )

    fig4.update_layout(
        title_text = "<b>연간 총 구매 수량</b>", 
        title_font_size = 25,
        title_font_family = "Liberation Serif",
        title_font_color="#5b5b5b",
        xaxis_title = '', 
        yaxis_title = 'Total Quantity', 
        plot_bgcolor='white', 
    )

    fig4.update_yaxes(
        showgrid=True, gridwidth=0.5, gridcolor='lightgray',
        griddash='dot', tickfont=dict(color='gray')
    )

    fig4.show()

Display_quantity_each_year()

In [6]:
annual_sales = df.groupby('Year')['Sales'].sum().reset_index()
annual_orders = df.groupby('Year')['Order.ID'].nunique().reset_index()
annual_discount = df.groupby('Year')['Discount'].mean().reset_index()  

# 연도별 평균 주문 금액(AOV) 계산
annual_avg_order = pd.merge(annual_sales, annual_orders, on='Year')
annual_avg_order['AOV'] = annual_avg_order['Sales'] / annual_avg_order['Order.ID']

annual_discount_vs_aov = pd.merge(annual_avg_order, annual_discount, on='Year')


fig = px.scatter(
    annual_discount_vs_aov, x='Discount', y='AOV', text=annual_discount_vs_aov['Year'],
    title="<b>연도별 평균 할인율과 AOV의 관계</b>", trendline="ols", labels={"Discount": "Average Discount", "AOV": "Average Order Value (AOV)"}
)

fig.update_traces(
    marker=dict(size=10, color="#FFC220", line=dict(width=2, color="black")),  
    textposition="top center",  
    texttemplate="%{text}"  
)

fig.update_xaxes(tickfont=dict(color="gray"))
fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor="lightgray", tickfont=dict(color="gray"))

fig.show()

In [7]:
market_sales = df.groupby('Market').agg({'Sales': 'sum'}).reset_index()

fig = px.bar(
    market_sales, x='Market', y='Sales', color='Market',
    title="<b>Total Sales by Market</b>",
    labels={'Sales': 'Total Sales ($)', 'Market': 'Market'},
    text=market_sales['Sales'].apply(lambda x: f"${x:,.0f}") 
)

fig.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor='white',
    bargap=0.5,
)

fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray', griddash='dot', tickfont=dict(color='gray'))
fig.update_xaxes(tickfont=dict(color='gray'))

fig.show()

In [8]:
market_growth = pd.pivot_table(data=df, index='Year', columns='Market', values='Sales', aggfunc='sum')

def market_sales():
    data_sales = market_growth 
    fig = px.bar(data_sales, x=data_sales.index, y=data_sales.columns, barmode='group')

    fig.update_layout(
        title_text="<b>Yearly Total Sales Growth Across Regional Markets</b>",
        title_font_size=25,
        title_font_family="Liberation Serif",
        title_font_color="#5b5b5b",
        xaxis_title="Year",
        yaxis_title="Total Sales ($)",
        plot_bgcolor='white',
        bargap=0.5,
    )

    fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray', griddash='dot', tickfont=dict(color='gray'))
    fig.update_xaxes(tickfont=dict(color='gray'))

    fig.show()

market_sales()

In [10]:
apac_sales = df[df['Market'] == 'APAC'].groupby(['Year', 'Country'], as_index=False)['Sales'].sum()

apac_sales['Market Share'] = apac_sales.groupby('Year')['Sales'].transform(lambda x: (x / x.sum()) * 100)

top_5_countries = apac_sales[apac_sales['Year'] == 2011].nlargest(5, 'Sales')['Country'].tolist()

apac_top5_sales = apac_sales[apac_sales['Country'].isin(top_5_countries)]

fig = px.line(
    apac_top5_sales, x='Year', y='Market Share', color='Country', markers=True,
    title="<b>APAC Market Share Trends (2011-2014) - Top 5 Countries</b>",
    labels={'Market Share': 'Market Share (%)', 'Year': 'Year', 'Country': 'Country'}
)

fig.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor='white'
)

fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray', tickfont=dict(color='gray'))
fig.update_xaxes(tickmode="linear", dtick=1, tickfont=dict(color='gray'))

fig.show()

In [11]:
australia_df = df[df['Country'] == 'Australia']

australia_aov = australia_df.groupby('Year').agg({'Sales': 'sum', 'Order.ID': 'nunique'}).reset_index()
australia_aov['AOV'] = australia_aov['Sales'] / australia_aov['Order.ID']

fig = px.line(
    australia_aov, x='Year', y='AOV', markers=True,
    title="<b>Yearly AOV Trend in Australia</b>",
    labels={'AOV': 'Average Order Value ($)', 'Year': 'Year'},
    text=australia_aov['AOV'].apply(lambda x: f"${x:,.2f}")
)

fig.update_traces(textposition='top right', line=dict(width=3))
fig.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor='white'
)

fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray', tickfont=dict(color='gray'))
fig.update_xaxes(tickmode="linear", dtick=1, tickfont=dict(color='gray'))

fig.show()

In [12]:
china_orders_per_year = df[(df['Market'] == 'APAC') & (df['Country'] == 'China')]\
    .groupby('Year')['Order.ID'].nunique().reset_index()

australia_orders_per_year = df[(df['Market'] == 'APAC') & (df['Country'] == 'Australia')]\
    .groupby('Year')['Order.ID'].nunique().reset_index()

australia_orders_per_year['Country'] = 'Australia'
china_orders_per_year['Country'] = 'China'

orders_comparison = pd.concat([australia_orders_per_year, china_orders_per_year])

fig = px.line(
    orders_comparison, x='Year', y='Order.ID', color='Country', markers=True,
    title="<b>Yearly Order Count Comparison: Australia vs China (APAC Market)</b>",
    labels={'Order.ID': 'Total Orders', 'Year': 'Year', 'Country': 'Country'},
    text=orders_comparison['Order.ID']
)

fig.update_traces(textposition='top right', line=dict(width=3))
fig.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor='white'
)

fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray', tickfont=dict(color='gray'))
fig.update_xaxes(tickmode="linear", dtick=1, tickfont=dict(color='gray'))

fig.show()

In [13]:
china_df = df[df['Country'] == 'China']

australia_aov = australia_df.groupby('Year').agg({'Sales': 'sum', 'Order.ID': 'nunique'}).reset_index()
australia_aov['AOV'] = australia_aov['Sales'] / australia_aov['Order.ID']
australia_aov['Country'] = 'Australia'

china_aov = china_df.groupby('Year').agg({'Sales': 'sum', 'Order.ID': 'nunique'}).reset_index()
china_aov['AOV'] = china_aov['Sales'] / china_aov['Order.ID']
china_aov['Country'] = 'China'

aov_comparison = pd.concat([australia_aov, china_aov])

fig = px.line(
    aov_comparison, x='Year', y='AOV', color='Country', markers=True,
    title="<b>Yearly AOV Comparison: Australia vs China</b>",
    labels={'AOV': 'Average Order Value ($)', 'Year': 'Year', 'Country': 'Country'},
    text=aov_comparison['AOV'].apply(lambda x: f"${x:,.2f}")
)

fig.update_traces(textposition='top right', line=dict(width=3))
fig.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor='white'
)

fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='lightgray', tickfont=dict(color='gray'))
fig.update_xaxes(tickmode="linear", dtick=1, tickfont=dict(color='gray'))

fig.show()

In [14]:
countries = ["Australia", "China"]
df_filtered = df[df["Country"].isin(countries)]

first_purchase = df_filtered.groupby("Customer.ID")["Year"].min().reset_index()
first_purchase.columns = ["Customer.ID", "First_Purchase_Year"]

df_filtered = df_filtered.merge(first_purchase, on="Customer.ID", how="left")
df_filtered["Customer_Type"] = df_filtered.apply(lambda x: "New Customer" if x["Year"] == x["First_Purchase_Year"] else "Returning Customer", axis=1)

customer_trend = df_filtered.groupby(["Year", "Country", "Customer_Type"])["Customer.ID"].nunique().reset_index()
customer_trend.columns = ["Year", "Country", "Customer_Type", "Customer_Count"]

fig = px.bar(
    customer_trend, x="Year", y="Customer_Count", color="Customer_Type", barmode="stack",
    facet_col="Country", facet_col_spacing=0.08,
    title="<b>Yearly New vs Returning Customers: Australia vs China</b>",
    labels={"Customer_Count": "Customer Count", "Year": "Year", "Customer_Type": "Customer Type"}
)

fig.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor="white"
)

fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor="lightgray", tickfont=dict(color="gray"))
fig.update_xaxes(tickmode="linear", dtick=1, tickfont=dict(color="gray"))

fig.show()

fig2 = px.line(
    customer_trend, x="Year", y="Customer_Count", color="Customer_Type", markers=True,
    facet_col="Country", facet_col_spacing=0.08,
    title="<b>Trend of New vs Returning Customers: Australia vs China</b>",
    labels={"Customer_Count": "Customer Count", "Year": "Year", "Customer_Type": "Customer Type"}
)

fig2.update_traces(textposition="top right", line=dict(width=3))
fig2.update_layout(
    title_font_size=20,
    title_font_family="Liberation Serif",
    title_font_color="#5b5b5b",
    plot_bgcolor="white"
)

fig2.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor="lightgray", tickfont=dict(color="gray"))
fig2.update_xaxes(tickmode="linear", dtick=1, tickfont=dict(color="gray"))

fig2.show()

In [15]:
australia_df = df[df['Country'] == 'Australia']

discount_profit_trend = australia_df.groupby('Year').agg({'Discount': 'mean', 'Profit': 'sum', 'Sales': 'sum'}).reset_index()
discount_profit_trend['Profit Margin'] = discount_profit_trend['Profit'] / discount_profit_trend['Sales']

fig = px.scatter(
    discount_profit_trend, x='Discount', y='Profit Margin', text=discount_profit_trend['Year'],
    title="<b>Correlation between Discount Rate and Profit Margin in Australia</b>",
    labels={'Discount': 'Average Discount Rate (%)', 'Profit Margin': 'Profit Margin (%)'},
    trendline='ols', 
    trendline_color_override="#636EFA",  
    color_discrete_sequence=["#636EFA"]  
)

fig.update_traces(marker=dict(size=10), textposition='top right')
fig.update_layout(plot_bgcolor='white')

fig.show()