In [9]:
import polars as pl
from perspective.widget import PerspectiveWidget

In [12]:
df = pl.read_parquet("stock_trades.parquet")
df = df.with_columns(
    pl.col("交易时间").cast(pl.String),
    手续费率=pl.col("手续费") / pl.col("成交金额"),
    印花税率=pl.col("印花税") / pl.col("成交金额"),
    过户费率=pl.col("过户费") / pl.col("成交金额"),
)
df = df.with_row_index("序号", 1)
df

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-18,"""09:38:10""","""002462""","""嘉事堂""","""卖出""",13.2062,10400.0,137344.0,21.98,137.35,1.38,0.0,137184.67,0.00016,0.001,0.00001
2,"""湘财""",2022-07-18,"""09:44:52""","""600408""","""安泰集团""","""买入""",3.19,47000.0,149930.0,23.99,0.0,1.51,0.0,-149955.5,0.00016,0.0,0.00001
3,"""湘财""",2022-07-18,"""09:44:31""","""600648""","""外高桥""","""买入""",12.6066,11900.0,150019.0,24.0,0.0,1.49,0.0,-150044.49,0.00016,0.0,0.00001
4,"""湘财""",2022-07-18,"""09:43:38""","""600269""","""赣粤高速""","""买入""",3.69,40700.0,150183.0,24.03,0.0,1.5,0.0,-150208.53,0.00016,0.0,0.00001
5,"""湘财""",2022-07-18,"""09:42:51""","""600015""","""华夏银行""","""买入""",5.07,30000.0,152100.0,24.34,0.0,1.52,0.0,-152125.86,0.00016,0.0,0.00001
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-18,"""09:46:15""","""300464""","""星徽股份""","""卖出""",5.74,16100.0,92414.0,8.82,46.21,0.0,0.0,92358.97,0.000095,0.0005,0.0
360,"""海通两融""",2023-10-18,"""09:55:41""","""002661""","""克明食品""","""买入""",9.42,8500.0,80072.0,7.64,0.0,0.0,0.0,-80079.64,0.000095,0.0,0.0
361,"""海通两融""",2023-10-09,"""09:48:02""","""002753""","""永东股份""","""买入""",7.02,14200.0,99684.0,9.51,0.0,0.0,0.0,-99693.51,0.000095,0.0,0.0
362,"""海通两融""",2023-10-09,"""09:45:18""","""000698""","""沈阳化工""","""卖出""",4.053,23800.0,96460.0,9.2,48.24,0.0,0.0,96402.56,0.000095,0.0005,0.0


In [14]:
PerspectiveWidget(df)

PerspectiveWidget(binding_mode='server', columns=['序号', '券商', '交易日期', '交易时间', '证券代码', '证券名称', '买卖标志', '成交价格', …

In [22]:
d1 = df.join(
    df.group_by("证券代码", "证券名称")
    .agg(
        结余数量=(
            pl.when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .sum()
        ),
    )
    .filter(pl.col("结余数量") < 0),
    on="证券代码",
    how="anti",
)

In [23]:
d1

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-18,"""09:38:10""","""002462""","""嘉事堂""","""卖出""",13.2062,10400.0,137344.0,21.98,137.35,1.38,0.0,137184.67,0.00016,0.001,0.00001
2,"""湘财""",2022-07-18,"""09:44:52""","""600408""","""安泰集团""","""买入""",3.19,47000.0,149930.0,23.99,0.0,1.51,0.0,-149955.5,0.00016,0.0,0.00001
3,"""湘财""",2022-07-18,"""09:44:31""","""600648""","""外高桥""","""买入""",12.6066,11900.0,150019.0,24.0,0.0,1.49,0.0,-150044.49,0.00016,0.0,0.00001
4,"""湘财""",2022-07-18,"""09:43:38""","""600269""","""赣粤高速""","""买入""",3.69,40700.0,150183.0,24.03,0.0,1.5,0.0,-150208.53,0.00016,0.0,0.00001
5,"""湘财""",2022-07-18,"""09:42:51""","""600015""","""华夏银行""","""买入""",5.07,30000.0,152100.0,24.34,0.0,1.52,0.0,-152125.86,0.00016,0.0,0.00001
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-18,"""09:46:15""","""300464""","""星徽股份""","""卖出""",5.74,16100.0,92414.0,8.82,46.21,0.0,0.0,92358.97,0.000095,0.0005,0.0
360,"""海通两融""",2023-10-18,"""09:55:41""","""002661""","""克明食品""","""买入""",9.42,8500.0,80072.0,7.64,0.0,0.0,0.0,-80079.64,0.000095,0.0,0.0
361,"""海通两融""",2023-10-09,"""09:48:02""","""002753""","""永东股份""","""买入""",7.02,14200.0,99684.0,9.51,0.0,0.0,0.0,-99693.51,0.000095,0.0,0.0
362,"""海通两融""",2023-10-09,"""09:45:18""","""000698""","""沈阳化工""","""卖出""",4.053,23800.0,96460.0,9.2,48.24,0.0,0.0,96402.56,0.000095,0.0005,0.0


In [24]:
start_date = df["交易日期"].min()
start_date

datetime.date(2022, 7, 11)

In [26]:
end_date = df["交易日期"].max()
end_date

datetime.date(2023, 10, 31)

In [28]:
k1 = pl.select(日期=pl.date_range(start_date, end_date))
k1

日期
date
2022-07-11
2022-07-12
2022-07-13
2022-07-14
2022-07-15
…
2023-10-27
2023-10-28
2023-10-29
2023-10-30


In [31]:
k2 = df["证券代码"].unique().sort().to_frame()
k2

证券代码
str
"""000096"""
"""000532"""
"""000559"""
"""000599"""
"""000655"""
…
"""688299"""
"""688321"""
"""688360"""
"""688393"""


In [33]:
k = k1.join(k2, how="cross")

In [41]:
d2 = (
    k.join(
        d1, left_on=["日期", "证券代码"], right_on=["交易日期", "证券代码"], how="left"
    )
    .sort("日期", "证券代码")
    .with_columns(
        结余数量=(
            pl.when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .otherwise(0)
            .cum_sum()
            .over("证券代码")
        ),
    )
    .filter(pl.col.结余数量 > 0)
)

In [42]:
PerspectiveWidget(d2)

PerspectiveWidget(binding_mode='server', columns=['日期', '证券代码', '序号', '券商', '交易时间', '证券名称', '买卖标志', '成交价格', '成…

In [43]:
import tushare as ts

In [44]:
pro = ts.pro_api()

In [47]:
hq = pro.daily(
    ts_code="002462.SZ",
    start_date=format(start_date, "%Y%m%d"),
    end_date=format(end_date, "%Y%m%d"),
)
hq = pl.from_pandas(hq)
hq

ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount
str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""002462.SZ""","""20231031""",14.75,14.9,14.59,14.7,14.75,-0.05,-0.339,65859.96,96984.271
"""002462.SZ""","""20231030""",13.88,14.89,13.88,14.75,13.93,0.82,5.8866,123932.16,180119.372
"""002462.SZ""","""20231027""",13.7,13.98,13.51,13.93,13.64,0.29,2.1261,35782.0,49386.168
"""002462.SZ""","""20231026""",13.49,13.68,13.4,13.64,13.62,0.02,0.1468,19215.0,26005.866
"""002462.SZ""","""20231025""",13.65,13.77,13.58,13.62,13.67,-0.05,-0.3658,18484.0,25274.163
…,…,…,…,…,…,…,…,…,…,…
"""002462.SZ""","""20220715""",13.61,13.66,13.12,13.13,13.59,-0.46,-3.3848,32967.65,44114.064
"""002462.SZ""","""20220714""",13.54,13.75,13.5,13.59,13.54,0.05,0.3693,21967.0,29851.164
"""002462.SZ""","""20220713""",13.55,13.63,13.39,13.54,13.61,-0.07,-0.5143,22793.0,30714.624
"""002462.SZ""","""20220712""",13.65,13.69,13.41,13.61,13.65,-0.04,-0.293,29679.0,40146.31


In [52]:
ts_codes = (
    d1.select(
        证券代码=(
            pl.when(pl.col("证券代码").str.head(1).is_in(["0", "3"]))
            .then(pl.format("{}.SZ", pl.col("证券代码")))
            .when(pl.col("证券代码").str.head(1) == "6")
            .then(pl.format("{}.SH", pl.col("证券代码")))
        ),
    )
    .to_series()
    .unique()
    .sort()
    .to_list()
)

In [56]:
from tqdm.notebook import tqdm

In [59]:
hq = [
    pl.from_pandas(
        pro.daily(
            ts_code=ts_code,
            start_date=format(start_date, "%Y%m%d"),
            end_date=format(end_date, "%Y%m%d"),
        )
    )
    for ts_code in tqdm(ts_codes)
]

  0%|          | 0/149 [00:00<?, ?it/s]

In [58]:
len(hq)

149

In [61]:
hq = pl.concat(hq)

In [63]:
hq.write_parquet("daily.parquet")

In [69]:
hq = pl.read_parquet("daily.parquet")
hq = hq.with_columns(
    pl.col("ts_code").str.head(6),
    pl.col("trade_date").str.to_date("%Y%m%d"),
)

In [76]:
d1.join(
    hq, left_on=["交易日期", "证券代码"], right_on=["trade_date", "ts_code"], how="left"
).with_columns(
    vratio=pl.col("成交数量") / 100 / pl.col("vol"),
).sort("vratio")

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率,open,high,low,close,pre_close,change,pct_chg,vol,amount,vratio
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
252,"""湘财""",2023-05-09,"""09:33:16""","""601166""","""兴业银行""","""卖出""",17.75,3000.0,53250.0,8.52,53.25,0.53,0.0,53187.7,0.00016,0.001,0.00001,17.77,17.94,17.5,17.56,17.63,-0.07,-0.3971,1.6276e6,2.8810e6,0.000018
34,"""湘财""",2022-10-25,"""09:30:19""","""300368""","""汇金股份""","""卖出""",6.2,100.0,620.0,0.1,0.62,0.01,0.0,619.28,0.000161,0.001,0.000016,6.18,6.25,5.95,6.06,6.2,-0.14,-2.2581,49219.99,29911.915,0.00002
33,"""湘财""",2022-10-25,"""09:30:39""","""002996""","""顺博合金""","""卖出""",13.64,100.0,1364.0,0.22,1.36,0.01,0.0,1362.42,0.000161,0.000997,0.000007,13.57,13.93,13.26,13.82,13.64,0.18,1.3196,38258.22,52122.119,0.000026
201,"""湘财""",2023-04-25,"""13:35:07""","""601166""","""兴业银行""","""买入""",17.15,3000.0,51450.0,8.23,0.0,0.51,0.0,-51458.74,0.00016,0.0,0.00001,16.96,17.29,16.96,17.27,16.93,0.34,2.0083,995234.22,1.7071e6,0.00003
35,"""湘财""",2022-10-25,"""09:25:00""","""002998""","""优彩资源""","""卖出""",6.73,100.0,673.0,0.11,0.67,0.01,0.0,672.22,0.000163,0.000996,0.000015,6.73,6.73,6.56,6.64,6.72,-0.08,-1.1905,16145.0,10687.614,0.000062
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
54,"""湘财""",2022-10-11,"""09:41:06""","""600231""","""凌钢股份""","""买入""",2.2,62700.0,137940.0,22.07,0.0,1.29,0.0,-137963.36,0.00016,0.0,0.000009,2.21,2.22,2.15,2.2,2.21,-0.01,-0.4525,61371.2,13386.328,0.010217
12,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001,6.57,6.57,6.49,6.51,6.57,-0.06,-0.9132,20737.5,13537.134,0.010319
125,"""湘财""",2023-02-21,"""09:37:16""","""300385""","""雪浪环境""","""买入""",6.5293,11500.0,75087.0,12.01,0.0,0.77,0.0,-75099.01,0.00016,0.0,0.00001,6.53,6.59,6.45,6.52,6.5,0.02,0.3077,11136.0,7250.686,0.010327
198,"""湘财""",2023-03-01,"""09:31:46""","""688069""","""德林海环保""","""卖出""",28.2838,2600.0,73538.0,11.77,73.58,0.77,0.0,73451.88,0.00016,0.001001,0.00001,28.52,28.86,28.27,28.67,28.54,0.13,0.4555,2416.06,6897.976,0.010761


In [77]:
d3 = (
    k.join(
        d1, left_on=["日期", "证券代码"], right_on=["交易日期", "证券代码"], how="left"
    )
    .sort("日期", "证券代码")
    .with_columns(
        结余数量=(
            pl.when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .otherwise(0)
            .cum_sum()
            .over("证券代码")
        ),
    )
)
d3

日期,证券代码,序号,券商,交易时间,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率,结余数量
date,str,u32,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2022-07-11,"""000096""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000532""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000559""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000599""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000655""",,,,,,,,,,,,,,,,,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2023-10-31,"""688299""",,,,,,,,,,,,,,,,,0.0
2023-10-31,"""688321""",,,,,,,,,,,,,,,,,0.0
2023-10-31,"""688360""",,,,,,,,,,,,,,,,,0.0
2023-10-31,"""688393""",,,,,,,,,,,,,,,,,0.0


In [100]:
d4 = (
    d3.join(
        hq,
        left_on=["日期", "证券代码"],
        right_on=["trade_date", "ts_code"],
        how="left",
    )
    .sort("证券代码", "日期")
    .with_columns(close=pl.col("close").fill_null(strategy="forward").over("证券代码"))
    .with_columns(
        持股市值=pl.col("结余数量") * pl.col("close"),
    )
    .group_by("日期")
    .agg(
        pl.col("持股市值").sum(),
        pl.col("发生金额").sum(),
    )
    .sort("日期")
    .with_columns(
        转账金额=pl.when(pl.int_range(0, pl.len()) == 0).then(100_0000).otherwise(0),
    )
    .with_columns(
        现金余额=(pl.col("转账金额") + pl.col("发生金额")).cum_sum(),
    )
    .with_columns(总资产=pl.col("持股市值") + pl.col("现金余额"))
)
d4

日期,持股市值,发生金额,转账金额,现金余额,总资产
date,f64,f64,i32,f64,f64
2022-07-11,703040.0,-699551.12,1000000,300448.88,1.0035e6
2022-07-12,707714.0,0.0,0,300448.88,1.0082e6
2022-07-13,713855.0,0.0,0,300448.88,1.0143e6
2022-07-14,710079.0,0.0,0,300448.88,1.0105e6
2022-07-15,692377.0,0.0,0,300448.88,992825.88
…,…,…,…,…,…
2023-10-27,571195.0,0.0,0,510845.96,1.0820e6
2023-10-28,571195.0,0.0,0,510845.96,1.0820e6
2023-10-29,571195.0,0.0,0,510845.96,1.0820e6
2023-10-30,686345.0,-94884.93,0,415961.03,1.1023e6


In [101]:
PerspectiveWidget(d4)

PerspectiveWidget(binding_mode='server', columns=['日期', '持股市值', '发生金额', '转账金额', '现金余额', '总资产'], table_name='0.…

In [105]:
ihq = pro.index_daily(
    ts_code="000300.SH",
    start_date=format(start_date, "%Y%m%d"),
    end_date=format(end_date, "%Y%m%d"),
    fields="ts_code,trade_date,pct_chg",
)

In [106]:
pl.from_pandas(ihq).write_parquet("index_daily.parquet")

In [115]:
ihq = pl.read_parquet("index_daily.parquet")
ihq = (
    ihq.with_columns(
        pl.col("trade_date").str.to_date("%Y%m%d"),
        pl.col("pct_chg") / 100 + 1,
    )
    .sort("trade_date")
    .with_columns(
        car=pl.col("pct_chg").cum_prod(),
    )
    .with_columns(沪深300=pl.col("car") * 100_000)
)
ihq

ts_code,trade_date,pct_chg,car,沪深300
str,date,f64,f64,f64
"""000300.SH""",2022-07-11,0.983254,0.983254,98325.4
"""000300.SH""",2022-07-12,0.990585,0.973997,97399.666359
"""000300.SH""",2022-07-13,1.001818,0.975767,97576.738952
"""000300.SH""",2022-07-14,1.000142,0.975906,97590.594849
"""000300.SH""",2022-07-15,0.982983,0.959299,95929.895697
…,…,…,…,…
"""000300.SH""",2023-10-25,1.004969,0.791288,79128.845378
"""000300.SH""",2023-10-26,1.002764,0.793476,79347.557506
"""000300.SH""",2023-10-27,1.013727,0.804368,80436.761428
"""000300.SH""",2023-10-30,1.006003,0.809196,80919.623307


In [116]:
PerspectiveWidget(ihq)

PerspectiveWidget(binding_mode='server', columns=['ts_code', 'trade_date', 'pct_chg', 'car', '沪深300'], table_n…

In [118]:
d5 = d4.join(ihq, left_on="日期", right_on="trade_date")
d5 = d5.unpivot(
    on=["总资产", "沪深300"], index="日期", variable_name="资产类型", value_name="财富"
)

In [119]:
PerspectiveWidget(d5)

PerspectiveWidget(binding_mode='server', columns=['日期', '资产类型', '财富'], table_name='0.5464127848112389', theme=…