In [1]:
import polars as pl
from perspective.widget import PerspectiveWidget

In [2]:
df = pl.read_parquet("stock_trades.parquet")
df = df.sort("交易日期", "交易时间", "证券代码")
df = df.with_columns(
    pl.col("交易时间").cast(pl.String),
    手续费率=pl.col("手续费") / pl.col("成交金额"),
    印花税率=pl.col("印花税") / pl.col("成交金额"),
    过户费率=pl.col("过户费") / pl.col("成交金额"),
)
df = df.with_row_index("序号", 1)
df

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-11,"""09:33:37""","""000900""","""现代投资""","""买入""",4.05,34400.0,139320.0,22.29,0.0,1.39,0.0,-139342.29,0.00016,0.0,0.00001
2,"""湘财""",2022-07-11,"""09:34:24""","""601077""","""渝农商行""","""买入""",3.65,38300.0,139795.0,22.37,0.0,1.38,0.0,-139818.75,0.00016,0.0,0.00001
3,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001
4,"""湘财""",2022-07-11,"""09:37:25""","""601992""","""金隅集团""","""买入""",2.59,54000.0,139860.0,22.38,0.0,1.42,0.0,-139883.8,0.00016,0.0,0.00001
5,"""湘财""",2022-07-11,"""09:38:16""","""002462""","""嘉事堂""","""买入""",13.51,10400.0,140504.0,22.48,0.0,1.41,0.0,-140526.48,0.00016,0.0,0.00001
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-31,"""09:31:53""","""002956""","""西麦食品""","""卖出""",14.13,5000.0,70650.0,6.74,35.35,0.0,0.0,70607.91,0.000095,0.0005,0.0
360,"""海通两融""",2023-10-31,"""09:39:57""","""603214""","""爱婴室""","""买入""",15.84,3100.0,49104.0,5.0,0.0,0.51,0.0,-49109.51,0.000102,0.0,0.00001
361,"""海通两融""",2023-10-31,"""09:40:55""","""300132""","""青松股份""","""买入""",5.21,9600.0,50016.0,5.0,0.0,0.0,0.0,-50021.0,0.0001,0.0,0.0
362,"""海通两融""",2023-10-31,"""09:43:13""","""002492""","""恒基达鑫""","""买入""",5.91,8400.0,49644.0,5.0,0.0,0.0,0.0,-49649.0,0.000101,0.0,0.0


In [3]:
PerspectiveWidget(df)

PerspectiveWidget(binding_mode='server', columns=['序号', '券商', '交易日期', '交易时间', '证券代码', '证券名称', '买卖标志', '成交价格', …

In [4]:
df.group_by("证券代码", "证券名称").agg(pl.len(), pl.col("买卖标志"))

证券代码,证券名称,len,买卖标志
str,str,u32,list[str]
"""600839""","""四川长虹""",3,"[""买入"", ""卖出"", ""卖出""]"
"""600022""","""山东钢铁""",2,"[""买入"", ""卖出""]"
"""688660""","""电气风电""",1,"[""买入""]"
"""600336""","""澳柯玛""",2,"[""买入"", ""卖出""]"
"""002106""","""莱宝高科""",2,"[""买入"", ""卖出""]"
…,…,…,…
"""300221""","""银禧科技""",2,"[""买入"", ""卖出""]"
"""600120""","""浙江东方""",2,"[""买入"", ""卖出""]"
"""002753""","""永东股份""",2,"[""买入"", ""卖出""]"
"""000717""","""中南股份""",2,"[""买入"", ""卖出""]"


In [5]:
df.group_by("证券代码", "证券名称").agg(
    结余数量=(
        pl.when(pl.col("买卖标志") == "卖出")
        .then(-pl.col("成交数量"))
        .when(pl.col("买卖标志") == "买入")
        .then(pl.col("成交数量"))
        .sum()
    ),
).sort("结余数量")

证券代码,证券名称,结余数量
str,str,f64
"""603167""","""渤海轮渡""",-13600.0
"""300889""","""爱克股份""",-7000.0
"""600333""","""长春燃气""",-3900.0
"""300155""","""安居宝""",0.0
"""603626""","""科森科技""",0.0
…,…,…
"""600525""","""长园集团""",8500.0
"""300132""","""青松股份""",9600.0
"""300022""","""吉峰科技""",10800.0
"""300215""","""电科院""",20000.0


In [6]:
d1 = df.join(
    df.group_by("证券代码", "证券名称")
    .agg(
        结余数量=(
            pl.when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .sum()
        ),
    )
    .filter(pl.col("结余数量") < 0),
    on="证券代码",
    how="anti",
)

In [7]:
d1

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-11,"""09:33:37""","""000900""","""现代投资""","""买入""",4.05,34400.0,139320.0,22.29,0.0,1.39,0.0,-139342.29,0.00016,0.0,0.00001
2,"""湘财""",2022-07-11,"""09:34:24""","""601077""","""渝农商行""","""买入""",3.65,38300.0,139795.0,22.37,0.0,1.38,0.0,-139818.75,0.00016,0.0,0.00001
3,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001
4,"""湘财""",2022-07-11,"""09:37:25""","""601992""","""金隅集团""","""买入""",2.59,54000.0,139860.0,22.38,0.0,1.42,0.0,-139883.8,0.00016,0.0,0.00001
5,"""湘财""",2022-07-11,"""09:38:16""","""002462""","""嘉事堂""","""买入""",13.51,10400.0,140504.0,22.48,0.0,1.41,0.0,-140526.48,0.00016,0.0,0.00001
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-31,"""09:31:53""","""002956""","""西麦食品""","""卖出""",14.13,5000.0,70650.0,6.74,35.35,0.0,0.0,70607.91,0.000095,0.0005,0.0
360,"""海通两融""",2023-10-31,"""09:39:57""","""603214""","""爱婴室""","""买入""",15.84,3100.0,49104.0,5.0,0.0,0.51,0.0,-49109.51,0.000102,0.0,0.00001
361,"""海通两融""",2023-10-31,"""09:40:55""","""300132""","""青松股份""","""买入""",5.21,9600.0,50016.0,5.0,0.0,0.0,0.0,-50021.0,0.0001,0.0,0.0
362,"""海通两融""",2023-10-31,"""09:43:13""","""002492""","""恒基达鑫""","""买入""",5.91,8400.0,49644.0,5.0,0.0,0.0,0.0,-49649.0,0.000101,0.0,0.0


In [8]:
start_date = df["交易日期"].min()
start_date

datetime.date(2022, 7, 11)

In [9]:
end_date = df["交易日期"].max()
end_date

datetime.date(2023, 10, 31)

In [10]:
k1 = pl.select(日期=pl.date_range(start_date, end_date))
k1

日期
date
2022-07-11
2022-07-12
2022-07-13
2022-07-14
2022-07-15
…
2023-10-27
2023-10-28
2023-10-29
2023-10-30


In [11]:
k2 = df["证券代码"].unique().sort().to_frame()
k2

证券代码
str
"""000096"""
"""000532"""
"""000559"""
"""000599"""
"""000655"""
…
"""688299"""
"""688321"""
"""688360"""
"""688393"""


In [12]:
k = k1.join(k2, how="cross")

In [13]:
d2 = (
    k.join(
        d1, left_on=["日期", "证券代码"], right_on=["交易日期", "证券代码"], how="left"
    )
    .sort("日期", "证券代码")
    .with_columns(
        结余数量=(
            pl.when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .otherwise(0)
            .cum_sum()
            .over("证券代码")
        ),
    )
    .filter(pl.col.结余数量 > 0)
)

In [14]:
PerspectiveWidget(d2)

PerspectiveWidget(binding_mode='server', columns=['日期', '证券代码', '序号', '券商', '交易时间', '证券名称', '买卖标志', '成交价格', '成…

In [15]:
import tushare as ts

In [16]:
pro = ts.pro_api()

In [20]:
hq = pro.daily(
    ts_code="002462.SZ",
    start_date=format(start_date, "%Y%m%d"),
    end_date=format(end_date, "%Y%m%d"),
)
hq = pl.from_pandas(hq)
hq

ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount
str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""002462.SZ""","""20231031""",14.75,14.9,14.59,14.7,14.75,-0.05,-0.339,65859.96,96984.271
"""002462.SZ""","""20231030""",13.88,14.89,13.88,14.75,13.93,0.82,5.8866,123932.16,180119.372
"""002462.SZ""","""20231027""",13.7,13.98,13.51,13.93,13.64,0.29,2.1261,35782.0,49386.168
"""002462.SZ""","""20231026""",13.49,13.68,13.4,13.64,13.62,0.02,0.1468,19215.0,26005.866
"""002462.SZ""","""20231025""",13.65,13.77,13.58,13.62,13.67,-0.05,-0.3658,18484.0,25274.163
…,…,…,…,…,…,…,…,…,…,…
"""002462.SZ""","""20220715""",13.61,13.66,13.12,13.13,13.59,-0.46,-3.3848,32967.65,44114.064
"""002462.SZ""","""20220714""",13.54,13.75,13.5,13.59,13.54,0.05,0.3693,21967.0,29851.164
"""002462.SZ""","""20220713""",13.55,13.63,13.39,13.54,13.61,-0.07,-0.5143,22793.0,30714.624
"""002462.SZ""","""20220712""",13.65,13.69,13.41,13.61,13.65,-0.04,-0.293,29679.0,40146.31


In [21]:
hq.dtypes

[String,
 String,
 Float64,
 Float64,
 Float64,
 Float64,
 Float64,
 Float64,
 Float64,
 Float64,
 Float64]

In [26]:
ts_codes = (
    d1.select(
        证券代码=(
            pl.when(pl.col("证券代码").str.head(1).is_in(["0", "3"]))
            .then(pl.format("{}.SZ", pl.col("证券代码")))
            .when(pl.col("证券代码").str.head(1) == "6")
            .then(pl.format("{}.SH", pl.col("证券代码")))
        ),
    )
    .to_series()
    .unique()
    .sort()
    .to_list()
)

In [27]:
from tqdm.notebook import tqdm

In [32]:
hq = [
    pl.from_pandas(
        pro.daily(
            ts_code=ts_code,
            start_date=format(start_date, "%Y%m%d"),
            end_date=format(end_date, "%Y%m%d"),
        )
    )
    for ts_code in tqdm(ts_codes)
]

  0%|          | 0/149 [00:00<?, ?it/s]

In [33]:
len(hq)

149

In [34]:
hq = pl.concat(hq)

In [36]:
hq.write_parquet("daily.parquet")

In [40]:
hq = pl.read_parquet("daily.parquet")
hq = hq.with_columns(
    pl.col("ts_code").str.head(6),
    pl.col("trade_date").str.to_date("%Y%m%d"),
)

In [42]:
d1.join(
    hq, left_on=["交易日期", "证券代码"], right_on=["trade_date", "ts_code"], how="left"
).with_columns(
    vratio=pl.col("成交数量") / 100 / pl.col("vol"),
)

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率,open,high,low,close,pre_close,change,pct_chg,vol,amount,vratio
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-11,"""09:33:37""","""000900""","""现代投资""","""买入""",4.05,34400.0,139320.0,22.29,0.0,1.39,0.0,-139342.29,0.00016,0.0,0.00001,4.08,4.13,4.04,4.12,4.06,0.06,1.4778,89030.64,36352.212,0.003864
2,"""湘财""",2022-07-11,"""09:34:24""","""601077""","""渝农商行""","""买入""",3.65,38300.0,139795.0,22.37,0.0,1.38,0.0,-139818.75,0.00016,0.0,0.00001,3.65,3.68,3.64,3.66,3.65,0.01,0.274,370994.07,135739.515,0.001032
3,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001,6.57,6.57,6.49,6.51,6.57,-0.06,-0.9132,20737.5,13537.134,0.010319
4,"""湘财""",2022-07-11,"""09:37:25""","""601992""","""金隅集团""","""买入""",2.59,54000.0,139860.0,22.38,0.0,1.42,0.0,-139883.8,0.00016,0.0,0.00001,2.61,2.62,2.58,2.59,2.61,-0.02,-0.7663,247321.0,64052.01,0.002183
5,"""湘财""",2022-07-11,"""09:38:16""","""002462""","""嘉事堂""","""买入""",13.51,10400.0,140504.0,22.48,0.0,1.41,0.0,-140526.48,0.00016,0.0,0.00001,13.2,13.96,13.07,13.65,13.18,0.47,3.566,62827.0,85869.11,0.001655
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-31,"""09:31:53""","""002956""","""西麦食品""","""卖出""",14.13,5000.0,70650.0,6.74,35.35,0.0,0.0,70607.91,0.000095,0.0005,0.0,14.2,14.27,14.01,14.07,14.21,-0.14,-0.9852,17859.2,25247.849,0.0028
360,"""海通两融""",2023-10-31,"""09:39:57""","""603214""","""爱婴室""","""买入""",15.84,3100.0,49104.0,5.0,0.0,0.51,0.0,-49109.51,0.000102,0.0,0.00001,15.81,15.93,15.65,15.78,15.8,-0.02,-0.1266,18384.12,29025.236,0.001686
361,"""海通两融""",2023-10-31,"""09:40:55""","""300132""","""青松股份""","""买入""",5.21,9600.0,50016.0,5.0,0.0,0.0,0.0,-50021.0,0.0001,0.0,0.0,5.18,5.32,5.17,5.25,5.17,0.08,1.5474,88442.01,46306.236,0.001085
362,"""海通两融""",2023-10-31,"""09:43:13""","""002492""","""恒基达鑫""","""买入""",5.91,8400.0,49644.0,5.0,0.0,0.0,0.0,-49649.0,0.000101,0.0,0.0,5.87,5.94,5.86,5.91,5.87,0.04,0.6814,40253.0,23766.196,0.002087
