In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

#加载数据集
data = pd.read_csv("shopping_trends.CSV")
df = pd.DataFrame(data)

#A.不同年龄男性的购物偏好有什么差异
def age_group(age):
    return f"{age // 10 * 10}-{age // 10 * 10 + 9}"
    
male_data = df[df['Gender'] == 'Male']

male_data['groups'] = male_data["Age"].apply(age_group)
counts = male_data.groupby(["groups", "Category"])['Purchase Amount (USD)'].sum().reset_index()

plt.figure(figsize=(16, 10))
ax = sns.barplot(x = counts["groups"], y = counts["Purchase Amount (USD)"], hue = "Category", data = counts)
for p in ax.patches:
    height = p.get_height()
    if height > 0:  
        ax.text(
            p.get_x() + p.get_width() / 2,  
            height + 0.05,                 
            str(height),
            ha='center', va='bottom', fontsize=10
        )
plt.title("purchase amouts of different age and category")
plt.xlabel("age")
plt.ylabel("purchase amoount")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

#B.哪些品类和颜色在不同季节最受欢迎
data = pd.read_csv("shopping_trends.CSV")
df = pd.DataFrame(data)

seasons_data = df.groupby(["Season", "Category"])["Purchase Amount (USD)"].sum().reset_index()
fig = px.imshow(seasons_data.pivot(index = 'Season', columns = 'Category', values = 'Purchase Amount (USD)'), 
                text_auto = True,title = 'heatmap of purchase by season and category'
               )
fig.show()

#雷达图
category_data = df.groupby(["Category", "Season"])['Purchase Amount (USD)'].sum().reset_index()

# 获取所有唯一的类别和季节
categories = df['Category'].unique()
seasons = df['Season'].unique()

radar_data = []
for category in categories:
    category_season_data = category_data[category_data['Category'] == category]
    category_season_data = category_season_data.sort_values('Season')
    values = category_season_data['Purchase Amount (USD)'].values
    if len(values) < len(seasons):
        # 创建一个全0数组，然后填充已知值
        full_values = np.zeros(len(seasons))
        for i, season in enumerate(seasons):
            season_data = category_season_data[category_season_data['Season'] == season]
            if not season_data.empty:
                full_values[i] = season_data['Purchase Amount (USD)'].values[0]
        values = full_values
    radar_data.append(values)

angles = np.linspace(0, 2 * np.pi, len(seasons), endpoint=False).tolist()
angles += angles[:1]

fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))

for i, category in enumerate(categories):
    values = radar_data[i].tolist()
    values += values[:1]  # 闭合
    ax.plot(angles, values, linewidth=2, label=category)
    ax.fill(angles, values, alpha=0.25)

ax.set_xticks(angles[:-1])
ax.set_xticklabels(seasons)

ax.set_rlabel_position(30)
plt.yticks([radar_data[0].min(), radar_data[0].max()], ["Low", "High"], color="grey", size=7)
plt.ylim(0, max([max(data) for data in radar_data]) * 1.1)

plt.title("Seasonal Purchase Amount by Category", size=11, y=1.1)
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

#D支付方式与用户粘性
data = pd.read_csv('shopping_trends.CSV')
df = pd.DataFrame(data)

payment_data = df.groupby('Payment Method')['Purchase Amount (USD)'].sum().reset_index()
payment_method = payment_data['Payment Method']
payment_amount = payment_data['Purchase Amount (USD)']


fig = go.Figure(data = [go.Pie(
    labels = payment_method,
    values = payment_amount,
    textinfo = 'percent',
    insidetextorientation = 'radial',
    marker = dict(colors = px.colors.qualitative.Pastel, line=dict(color = '#000000', width = 2)),
    hoverinfo = 'label+percent+value'
)])
 
# 更新布局
fig.update_layout(
    title = 'distribution of purchase amount by payment method',
    titlefont = dict(size=16),
    showlegend = True,
    height = 600,
    width = 800,
    annotations = [
        dict(
            text = method,
            x = 0.5 + 0.57 * np.cos(np.deg2rad(angle)),
            y = 0.5 + 0.57 * np.sin(np.deg2rad(angle)),
            showarrow = False,
            font = dict(size = 12)
        )
        for angle, method in zip(
            np.linspace(0, 360, len(payment_method), endpoint = False),
            payment_method
        )
    ]
)

fig.show()

status = df.groupby(['Subscription Status', 'Category'])['Purchase Amount (USD)'].sum().reset_index()

categories = sorted(df['Category'].unique())
 
y1 = [0] * len(categories)
y2 = [0] * len(categories)
y3 = [0] * len(categories)

for idx, category in enumerate(categories):
    category_data = status[status['Category'] == category]
    y1[idx] = category_data[category_data['Subscription Status'] == 'Yes']['Purchase Amount (USD)'].sum()
    y2[idx] = category_data[category_data['Subscription Status'] == 'No']['Purchase Amount (USD)'].sum()
    y3[idx] = y1[idx] - y2[idx]

fig, ax2 = plt.subplots(figsize=(12, 6))

ax2.plot(categories, y1, label = 'Subscription Status of yes', marker = 'o')
ax2.plot(categories, y2, label = 'Subscription Status of no', marker = 'o')
ax2.plot(categories, y3, label = 'difference', marker = 'o')
ax2.legend()
ax2.set_xlabel("category")
ax2.set_ylabel("purchase amount")

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

data1 = pd.read_csv("shopping_trends.CSV")
df = pd.DataFrame(data1)

data = df.groupby(['Location', 'Category'])['Purchase Amount (USD)'].sum().reset_index()

fig = px.scatter(
    data,
    x = 'Category',
    y = 'Location',
    size = 'Purchase Amount (USD)',
    color = 'Purchase Amount (USD)',
    hover_data = ['Purchase Amount (USD)'],
    title = 'Bubble Chart of Category and Location',
    labels = {'Purchase Amount (USD)': 'Purchase Amount (USD)'},
    size_max = 30  # 调整气泡的最大大小
)

fig.update_layout(
    title_font = dict(size = 20),
    xaxis = dict(tickfont = dict(size = 15)),
    yaxis = dict(tickfont = dict(size = 15)),
    legend = dict(
        orientation = 'h',
        y = 1.1,
        x = 0.5,
        xanchor = 'center',
        font = dict(size = 20)
    ),
    height = 1800,
    width = 800
)

fig.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

#C地理分布与消费力
data1 = pd.read_csv("shopping_trends.CSV")
df = pd.DataFrame(data1)

data = df.groupby(['Location', 'Category'])['Purchase Amount (USD)'].sum().reset_index()
location = df['Location']
category = df['Category']
amount = df['Purchase Amount (USD)']

fig = px.imshow(
    data.pivot(columns = 'Category', index = 'Location',values = 'Purchase Amount (USD)'),
    text_auto = True, title = 'heatmap of category and location', width = 800, height = 2000
)

fig.update_layout(
    xaxis=dict(
        tickfont=dict(size=15)
    ),
    yaxis=dict(
        tickfont=dict(size=15) 
    )
)
fig.show()

In [None]:
import pandas as pd
import plotly.graph_objects as go

#E 颜色与品类相关性
data = pd.read_csv('shopping_trends.CSV')

flow_data = data.groupby(['Color', 'Category'])['Purchase Amount (USD)'].sum().reset_index()

colors = sorted(flow_data['Color'].unique())
categories = sorted(flow_data['Category'].unique())

nodes = colors + categories

color_indices = {color: idx for idx, color in enumerate(colors)}
category_indices = {category: idx + len(colors) for idx, category in enumerate(categories)}

sources = []
targets = []
values = []

for _, row in flow_data.iterrows():
    source_idx = color_indices[row['Color']]
    target_idx = category_indices[row['Category']]
    sources.append(source_idx)
    targets.append(target_idx)
    values.append(row['Purchase Amount (USD)'])

fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values
    )
))

fig.update_layout(title_text="Flow of Colors to Categories", font_size=10,width = 1100, height = 800)
fig.show()