In [1]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [2]:
# 数据库地址：数据库放在上一级目录下
db_path = os.path.join(os.path.dirname(os.getcwd()), "data.db")
engine_path = "sqlite:///" + db_path
# 创建数据库引擎
engine = create_engine(engine_path)

In [3]:
sql = """
select 
*
from
shopRefuse
"""

df = pd.read_sql(sql, engine)

In [4]:
df.sample(5)

Unnamed: 0,index,shopid,create_time,total_num,td_num
1391,1391,25,2022-02-19 00:00:00.000000,115,13
42,42,1,2022-02-14 00:00:00.000000,144,24
1025,1025,19,2022-01-20 00:00:00.000000,216,22
3286,3286,59,2022-02-10 00:00:00.000000,173,11
1324,1324,24,2022-02-08 00:00:00.000000,197,17


In [8]:
df2 = df.copy()

### 添加周维度

In [9]:
from datetime import datetime,timedelta

In [10]:
def get_monday_to_sunday(today, weekly=0):
    """
    :function: 获取指定日期的周一和周日的日期
    :param today: '2021-11-16'; 当前日期：today = datetime.now().strftime('%Y-%m-%d')
    :param weekly: 获取指定日期的上几周或者下几周，weekly=0当前周，weekly=-1上一周，weekly=1下一周
    :return: 返回指定日期的周一和周日日期
    :return_type: tuple
    """
    last = weekly * 7
    today = datetime.strptime(str(today), "%Y-%m-%d")
    monday = datetime.strftime(today - timedelta(today.weekday() - last), "%Y-%m-%d")
    monday_ = datetime.strptime(monday, "%Y-%m-%d")
    sunday = datetime.strftime(monday_ + timedelta(monday_.weekday() + 6), "%Y-%m-%d")
    return "{0}|{1}".format(monday, sunday)

In [11]:
get_monday_to_sunday("2022-01-01")

'2021-12-27|2022-01-02'

In [12]:
df2["week_range"] = df2["create_time"].map(lambda x:get_monday_to_sunday(str(x)[:10]))

In [13]:
df2["week_range"].value_counts()

2022-01-24|2022-01-30    560
2022-02-14|2022-02-20    560
2022-01-31|2022-02-06    560
2022-02-21|2022-02-27    560
2022-02-07|2022-02-13    560
2022-01-10|2022-01-16    560
2022-01-03|2022-01-09    560
2022-01-17|2022-01-23    560
Name: week_range, dtype: int64

In [14]:
df2.sample(5)

Unnamed: 0,index,shopid,create_time,total_num,td_num,week_range
4349,4349,78,2022-02-09 00:00:00.000000,251,28,2022-02-07|2022-02-13
1120,1120,21,2022-01-03 00:00:00.000000,193,27,2022-01-03|2022-01-09
392,392,8,2022-01-03 00:00:00.000000,173,26,2022-01-03|2022-01-09
921,921,17,2022-01-28 00:00:00.000000,106,27,2022-01-24|2022-01-30
1008,1008,19,2022-01-03 00:00:00.000000,184,15,2022-01-03|2022-01-09


### 计算每周的退单率

In [15]:
td_rate_df = df2.groupby(by=["shopid","week_range"],as_index=False).agg({"total_num":"sum","td_num":"sum"})

In [16]:
td_rate_df["td_rate"] = td_rate_df["td_num"]/td_rate_df["total_num"]

In [17]:
td_rate_df.sample(1)

Unnamed: 0,shopid,week_range,total_num,td_num,td_rate
256,33,2022-01-03|2022-01-09,1757,129,0.073421


### 计算每周总体的退单率

In [18]:
week_td_rate_df = df2.groupby(by="week_range",as_index=False).agg({"total_num":"sum","td_num":"sum"})

In [19]:
week_td_rate_df["week_td_rate"] = week_td_rate_df["td_num"]/week_td_rate_df["total_num"]

In [20]:
week_td_rate_df

Unnamed: 0,week_range,total_num,td_num,week_td_rate
0,2022-01-03|2022-01-09,112189,10931,0.097434
1,2022-01-10|2022-01-16,111791,10655,0.095312
2,2022-01-17|2022-01-23,113399,10724,0.094569
3,2022-01-24|2022-01-30,113776,11072,0.097314
4,2022-01-31|2022-02-06,112758,10853,0.09625
5,2022-02-07|2022-02-13,107143,10955,0.102247
6,2022-02-14|2022-02-20,111946,10989,0.098163
7,2022-02-21|2022-02-27,112292,10810,0.096267


### 统计每件商品高于周退单率的次数

In [21]:
merge_df = pd.merge(
    td_rate_df,week_td_rate_df[["week_range","week_td_rate"]],
    on="week_range",
    how="left"
)

In [22]:
merge_df["td_count"] = merge_df[["td_rate","week_td_rate"]].apply(lambda x:0 if x[0]<=x[1] else 1,axis=1)

In [23]:
merge_df[merge_df["shopid"]==1]

Unnamed: 0,shopid,week_range,total_num,td_num,td_rate,week_td_rate,td_count
0,1,2022-01-03|2022-01-09,1317,158,0.11997,0.097434,1
1,1,2022-01-10|2022-01-16,986,164,0.166329,0.095312,1
2,1,2022-01-17|2022-01-23,1104,117,0.105978,0.094569,1
3,1,2022-01-24|2022-01-30,1293,121,0.093581,0.097314,0
4,1,2022-01-31|2022-02-06,1607,129,0.080274,0.09625,0
5,1,2022-02-07|2022-02-13,1515,144,0.09505,0.102247,0
6,1,2022-02-14|2022-02-20,1251,140,0.11191,0.098163,1
7,1,2022-02-21|2022-02-27,1384,148,0.106936,0.096267,1


In [24]:
result_df = merge_df.pivot_table(index="shopid",columns="week_range",values="td_count",margins=True,aggfunc=lambda x:x.sum())

In [25]:
result_df

week_range,2022-01-03|2022-01-09,2022-01-10|2022-01-16,2022-01-17|2022-01-23,2022-01-24|2022-01-30,2022-01-31|2022-02-06,2022-02-07|2022-02-13,2022-02-14|2022-02-20,2022-02-21|2022-02-27,All
shopid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,1,1,0,0,0,1,1,5
2,1,0,1,0,0,1,1,0,4
3,1,0,0,1,0,1,1,1,5
4,1,1,1,0,0,0,1,1,5
5,0,1,0,1,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...
77,0,1,0,1,1,0,1,0,4
78,0,1,0,0,0,0,0,1,2
79,1,1,0,0,0,1,1,0,4
80,1,0,0,1,1,0,0,0,3


In [26]:
result_df["All"].value_counts()

5      19
4      17
2      14
3      13
6      12
7       4
1       1
331     1
Name: All, dtype: int64

### 高于5次的商品为异常商品

In [27]:
unnormal_shop_df = result_df[result_df["All"]>5]

In [28]:
unnormal_shop_df

week_range,2022-01-03|2022-01-09,2022-01-10|2022-01-16,2022-01-17|2022-01-23,2022-01-24|2022-01-30,2022-01-31|2022-02-06,2022-02-07|2022-02-13,2022-02-14|2022-02-20,2022-02-21|2022-02-27,All
shopid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
9,1,0,1,1,1,0,1,1,6
10,1,1,0,1,0,1,1,1,6
16,1,1,1,0,1,1,1,1,7
19,0,1,0,1,1,1,1,1,6
20,1,1,1,0,1,1,1,1,7
22,1,1,0,0,1,1,1,1,6
24,1,1,1,0,1,0,1,1,6
30,1,1,1,1,1,1,0,0,6
40,1,1,1,1,1,0,0,1,6
41,1,1,0,1,0,1,1,1,6
