In [1]:
import polars as pl
from perspective.widget import PerspectiveWidget

In [2]:
df = pl.read_parquet("stock_trades.parquet")
df = df.sort("交易日期", "交易时间", "证券代码")
df = df.with_columns(
    pl.col("交易时间").cast(pl.String),
    手续费率=pl.col("手续费") / pl.col("成交金额"),
    印花税率=pl.col("印花税") / pl.col("成交金额"),
    过户费率=pl.col("过户费") / pl.col("成交金额"),
)
df = df.with_row_index("序号", 1)
df

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-11,"""09:33:37""","""000900""","""现代投资""","""买入""",4.05,34400.0,139320.0,22.29,0.0,1.39,0.0,-139342.29,0.00016,0.0,0.00001
2,"""湘财""",2022-07-11,"""09:34:24""","""601077""","""渝农商行""","""买入""",3.65,38300.0,139795.0,22.37,0.0,1.38,0.0,-139818.75,0.00016,0.0,0.00001
3,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001
4,"""湘财""",2022-07-11,"""09:37:25""","""601992""","""金隅集团""","""买入""",2.59,54000.0,139860.0,22.38,0.0,1.42,0.0,-139883.8,0.00016,0.0,0.00001
5,"""湘财""",2022-07-11,"""09:38:16""","""002462""","""嘉事堂""","""买入""",13.51,10400.0,140504.0,22.48,0.0,1.41,0.0,-140526.48,0.00016,0.0,0.00001
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-31,"""09:31:53""","""002956""","""西麦食品""","""卖出""",14.13,5000.0,70650.0,6.74,35.35,0.0,0.0,70607.91,0.000095,0.0005,0.0
360,"""海通两融""",2023-10-31,"""09:39:57""","""603214""","""爱婴室""","""买入""",15.84,3100.0,49104.0,5.0,0.0,0.51,0.0,-49109.51,0.000102,0.0,0.00001
361,"""海通两融""",2023-10-31,"""09:40:55""","""300132""","""青松股份""","""买入""",5.21,9600.0,50016.0,5.0,0.0,0.0,0.0,-50021.0,0.0001,0.0,0.0
362,"""海通两融""",2023-10-31,"""09:43:13""","""002492""","""恒基达鑫""","""买入""",5.91,8400.0,49644.0,5.0,0.0,0.0,0.0,-49649.0,0.000101,0.0,0.0


In [3]:
PerspectiveWidget(df)

PerspectiveWidget(binding_mode='server', columns=['序号', '券商', '交易日期', '交易时间', '证券代码', '证券名称', '买卖标志', '成交价格', …

In [4]:
d1 = df.join(
    df.group_by("证券代码", "证券名称")
    .agg(
        结余数量=(
            pl.when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .sum()
        ),
    )
    .filter(pl.col("结余数量") < 0),
    on="证券代码",
    how="anti",
)

In [5]:
d1

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""湘财""",2022-07-11,"""09:33:37""","""000900""","""现代投资""","""买入""",4.05,34400.0,139320.0,22.29,0.0,1.39,0.0,-139342.29,0.00016,0.0,0.00001
2,"""湘财""",2022-07-11,"""09:34:24""","""601077""","""渝农商行""","""买入""",3.65,38300.0,139795.0,22.37,0.0,1.38,0.0,-139818.75,0.00016,0.0,0.00001
3,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001
4,"""湘财""",2022-07-11,"""09:37:25""","""601992""","""金隅集团""","""买入""",2.59,54000.0,139860.0,22.38,0.0,1.42,0.0,-139883.8,0.00016,0.0,0.00001
5,"""湘财""",2022-07-11,"""09:38:16""","""002462""","""嘉事堂""","""买入""",13.51,10400.0,140504.0,22.48,0.0,1.41,0.0,-140526.48,0.00016,0.0,0.00001
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
359,"""海通两融""",2023-10-31,"""09:31:53""","""002956""","""西麦食品""","""卖出""",14.13,5000.0,70650.0,6.74,35.35,0.0,0.0,70607.91,0.000095,0.0005,0.0
360,"""海通两融""",2023-10-31,"""09:39:57""","""603214""","""爱婴室""","""买入""",15.84,3100.0,49104.0,5.0,0.0,0.51,0.0,-49109.51,0.000102,0.0,0.00001
361,"""海通两融""",2023-10-31,"""09:40:55""","""300132""","""青松股份""","""买入""",5.21,9600.0,50016.0,5.0,0.0,0.0,0.0,-50021.0,0.0001,0.0,0.0
362,"""海通两融""",2023-10-31,"""09:43:13""","""002492""","""恒基达鑫""","""买入""",5.91,8400.0,49644.0,5.0,0.0,0.0,0.0,-49649.0,0.000101,0.0,0.0


In [6]:
start_date = df["交易日期"].min()
start_date

datetime.date(2022, 7, 11)

In [7]:
end_date = df["交易日期"].max()
end_date

datetime.date(2023, 10, 31)

In [8]:
k1 = pl.select(日期=pl.date_range(start_date, end_date))
k1

日期
date
2022-07-11
2022-07-12
2022-07-13
2022-07-14
2022-07-15
…
2023-10-27
2023-10-28
2023-10-29
2023-10-30


In [9]:
k2 = df["证券代码"].unique().sort().to_frame()
k2

证券代码
str
"""000096"""
"""000532"""
"""000559"""
"""000599"""
"""000655"""
…
"""688299"""
"""688321"""
"""688360"""
"""688393"""


In [10]:
k = k1.join(k2, how="cross")

In [11]:
d2 = (
    k.join(
        d1, left_on=["日期", "证券代码"], right_on=["交易日期", "证券代码"], how="left"
    )
    .sort("日期", "证券代码")
    .with_columns(
        结余数量=(
            pl.when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .otherwise(0)
            .cum_sum()
            .over("证券代码")
        ),
    )
    .filter(pl.col.结余数量 > 0)
)

In [12]:
PerspectiveWidget(d2)

PerspectiveWidget(binding_mode='server', columns=['日期', '证券代码', '序号', '券商', '交易时间', '证券名称', '买卖标志', '成交价格', '成…

In [13]:
import tushare as ts

In [14]:
pro = ts.pro_api()

In [15]:
f"{start_date:%Y%m%d}"

'20220711'

In [16]:
format(end_date, "%Y%m%d")

'20231031'

In [17]:
ts_codes = (
    d1.select(
        证券代码=(
            pl.when(pl.col("证券代码").str.head(1).is_in(["0", "3"]))
            .then(pl.format("{}.SZ", pl.col("证券代码")))
            .when(pl.col("证券代码").str.head(1) == "6")
            .then(pl.format("{}.SH", pl.col("证券代码")))
        ),
    )
    .to_series()
    .unique()
    .sort()
    .to_list()
)

In [18]:
len(ts_codes)

149

In [19]:
from tqdm.notebook import tqdm

In [20]:
hq = [
    pl.from_pandas(
        pro.daily(
            ts_code=ts_code,
            start_date=format(start_date, "%Y%m%d"),
            end_date=format(end_date, "%Y%m%d"),
        )
    )
    for ts_code in tqdm(ts_codes)
]

  0%|          | 0/149 [00:00<?, ?it/s]

In [21]:
len(hq)

149

In [22]:
hq = pl.concat(hq)

In [23]:
!pwd

/c/Users/PC/repo/week08


In [24]:
hq.write_parquet("daily.parquet")

In [25]:
hq = pl.read_parquet("daily.parquet")
hq = hq.with_columns(
    pl.col("ts_code").str.head(6),
    pl.col("trade_date").str.to_date("%Y%m%d"),
)

In [26]:
d1.join(
    hq, left_on=["交易日期", "证券代码"], right_on=["trade_date", "ts_code"], how="left"
).filter(
    ~pl.col("成交价格").is_between(pl.col("low"), pl.col("high")),
)

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率,open,high,low,close,pre_close,change,pct_chg,vol,amount
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64


In [27]:
d1.join(
    hq, left_on=["交易日期", "证券代码"], right_on=["trade_date", "ts_code"], how="left"
).with_columns(
    vratio=pl.col("成交数量") / 100 / pl.col("vol"),
).sort("vratio")

序号,券商,交易日期,交易时间,证券代码,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率,open,high,low,close,pre_close,change,pct_chg,vol,amount,vratio
u32,str,date,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
237,"""湘财""",2023-05-09,"""09:33:16""","""601166""","""兴业银行""","""卖出""",17.75,3000.0,53250.0,8.52,53.25,0.53,0.0,53187.7,0.00016,0.001,0.00001,17.77,17.94,17.5,17.56,17.63,-0.07,-0.3971,1.6276e6,2.8810e6,0.000018
48,"""湘财""",2022-10-25,"""09:30:19""","""300368""","""汇金股份""","""卖出""",6.2,100.0,620.0,0.1,0.62,0.01,0.0,619.28,0.000161,0.001,0.000016,6.18,6.25,5.95,6.06,6.2,-0.14,-2.2581,49219.99,29911.915,0.00002
49,"""湘财""",2022-10-25,"""09:30:39""","""002996""","""顺博合金""","""卖出""",13.64,100.0,1364.0,0.22,1.36,0.01,0.0,1362.42,0.000161,0.000997,0.000007,13.57,13.93,13.26,13.82,13.64,0.18,1.3196,38258.22,52122.119,0.000026
235,"""湘财""",2023-04-25,"""13:35:07""","""601166""","""兴业银行""","""买入""",17.15,3000.0,51450.0,8.23,0.0,0.51,0.0,-51458.74,0.00016,0.0,0.00001,16.96,17.29,16.96,17.27,16.93,0.34,2.0083,995234.22,1.7071e6,0.00003
46,"""湘财""",2022-10-25,"""09:25:00""","""002998""","""优彩资源""","""卖出""",6.73,100.0,673.0,0.11,0.67,0.01,0.0,672.22,0.000163,0.000996,0.000015,6.73,6.73,6.56,6.64,6.72,-0.08,-1.1905,16145.0,10687.614,0.000062
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
30,"""湘财""",2022-10-11,"""09:41:06""","""600231""","""凌钢股份""","""买入""",2.2,62700.0,137940.0,22.07,0.0,1.29,0.0,-137963.36,0.00016,0.0,0.000009,2.21,2.22,2.15,2.2,2.21,-0.01,-0.4525,61371.2,13386.328,0.010217
3,"""湘财""",2022-07-11,"""09:36:30""","""600894""","""广日股份""","""买入""",6.54,21400.0,139956.0,22.39,0.0,1.41,0.0,-139979.8,0.00016,0.0,0.00001,6.57,6.57,6.49,6.51,6.57,-0.06,-0.9132,20737.5,13537.134,0.010319
142,"""湘财""",2023-02-21,"""09:37:16""","""300385""","""雪浪环境""","""买入""",6.5293,11500.0,75087.0,12.01,0.0,0.77,0.0,-75099.01,0.00016,0.0,0.00001,6.53,6.59,6.45,6.52,6.5,0.02,0.3077,11136.0,7250.686,0.010327
152,"""湘财""",2023-03-01,"""09:31:46""","""688069""","""德林海环保""","""卖出""",28.2838,2600.0,73538.0,11.77,73.58,0.77,0.0,73451.88,0.00016,0.001001,0.00001,28.52,28.86,28.27,28.67,28.54,0.13,0.4555,2416.06,6897.976,0.010761


In [28]:
d3 = (
    k.join(
        d1, left_on=["日期", "证券代码"], right_on=["交易日期", "证券代码"], how="left"
    )
    .sort("日期", "证券代码")
    .with_columns(
        结余数量=(
            pl.when(pl.col("买卖标志") == "买入")
            .then(pl.col("成交数量"))
            .when(pl.col("买卖标志") == "卖出")
            .then(-pl.col("成交数量"))
            .otherwise(0)
            .cum_sum()
            .over("证券代码")
        ),
    )
)
d3

日期,证券代码,序号,券商,交易时间,证券名称,买卖标志,成交价格,成交数量,成交金额,手续费,印花税,过户费,其他费,发生金额,手续费率,印花税率,过户费率,结余数量
date,str,u32,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2022-07-11,"""000096""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000532""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000559""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000599""",,,,,,,,,,,,,,,,,0.0
2022-07-11,"""000655""",,,,,,,,,,,,,,,,,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2023-10-31,"""688299""",,,,,,,,,,,,,,,,,0.0
2023-10-31,"""688321""",,,,,,,,,,,,,,,,,0.0
2023-10-31,"""688360""",,,,,,,,,,,,,,,,,0.0
2023-10-31,"""688393""",,,,,,,,,,,,,,,,,0.0


In [29]:
d4 = (
    d3.join(
        hq,
        left_on=["日期", "证券代码"],
        right_on=["trade_date", "ts_code"],
        how="left",
    )
    .sort("证券代码", "日期")
    .with_columns(close=pl.col("close").fill_null(strategy="forward").over("证券代码"))
    .with_columns(持股市值=pl.col("结余数量") * pl.col("close"))
    .group_by("日期")
    .agg(pl.col("持股市值").sum())
    .sort("日期")
)
d4

日期,持股市值
date,f64
2022-07-11,703040.0
2022-07-12,707714.0
2022-07-13,713855.0
2022-07-14,710079.0
2022-07-15,692377.0
…,…
2023-10-27,571195.0
2023-10-28,571195.0
2023-10-29,571195.0
2023-10-30,686345.0


In [30]:
PerspectiveWidget(d4)

PerspectiveWidget(binding_mode='server', columns=['日期', '持股市值'], table_name='0.3595039580115339', theme=None)

In [31]:
d5 = (
    d3.join(
        hq,
        left_on=["日期", "证券代码"],
        right_on=["trade_date", "ts_code"],
        how="left",
    )
    .sort("证券代码", "日期")
    .with_columns(close=pl.col("close").fill_null(strategy="forward").over("证券代码"))
    .with_columns(持股市值=pl.col("结余数量") * pl.col("close"))
    .group_by("日期")
    .agg(
        pl.col("持股市值").sum(),
        pl.col("发生金额").sum(),
    )
    .sort("日期")
    .with_columns(
        转账金额=pl.when(pl.int_range(0, pl.len()) == 0).then(100_0000).otherwise(0),
    )
    .with_columns(
        现金余额=(pl.col("转账金额") + pl.col("发生金额")).cum_sum(),
    )
    .with_columns(总资产=pl.col("持股市值") + pl.col("现金余额"))
)
d5

日期,持股市值,发生金额,转账金额,现金余额,总资产
date,f64,f64,i32,f64,f64
2022-07-11,703040.0,-699551.12,1000000,300448.88,1.0035e6
2022-07-12,707714.0,0.0,0,300448.88,1.0082e6
2022-07-13,713855.0,0.0,0,300448.88,1.0143e6
2022-07-14,710079.0,0.0,0,300448.88,1.0105e6
2022-07-15,692377.0,0.0,0,300448.88,992825.88
…,…,…,…,…,…
2023-10-27,571195.0,0.0,0,510845.96,1.0820e6
2023-10-28,571195.0,0.0,0,510845.96,1.0820e6
2023-10-29,571195.0,0.0,0,510845.96,1.0820e6
2023-10-30,686345.0,-94884.93,0,415961.03,1.1023e6


In [32]:
PerspectiveWidget(d5)

PerspectiveWidget(binding_mode='server', columns=['日期', '持股市值', '发生金额', '转账金额', '现金余额', '总资产'], table_name='0.…

In [33]:
ihq = pro.index_daily(
    ts_code="000300.SH",
    start_date=format(start_date, "%Y%m%d"),
    end_date=format(end_date, "%Y%m%d"),
    fields="ts_code,trade_date,pct_chg",
)

In [34]:
pl.from_pandas(ihq).write_parquet("index_daily.parquet")

In [35]:
ihq = pl.read_parquet("index_daily.parquet")
ihq = (
    ihq.with_columns(
        pl.col("trade_date").str.to_date("%Y%m%d"),
        pl.col("pct_chg") / 100 + 1,
    )
    .sort("trade_date")
    .with_columns(
        car=pl.col("pct_chg").cum_prod(),
    )
    .with_columns(
        沪深300=pl.col("car") * 100_0000,
    )
)
ihq

ts_code,trade_date,pct_chg,car,沪深300
str,date,f64,f64,f64
"""000300.SH""",2022-07-11,0.983254,0.983254,983254.0
"""000300.SH""",2022-07-12,0.990585,0.973997,973996.66359
"""000300.SH""",2022-07-13,1.001818,0.975767,975767.389524
"""000300.SH""",2022-07-14,1.000142,0.975906,975905.948494
"""000300.SH""",2022-07-15,0.982983,0.959299,959298.956968
…,…,…,…,…
"""000300.SH""",2023-10-25,1.004969,0.791288,791288.453778
"""000300.SH""",2023-10-26,1.002764,0.793476,793475.575065
"""000300.SH""",2023-10-27,1.013727,0.804368,804367.614284
"""000300.SH""",2023-10-30,1.006003,0.809196,809196.233072


In [36]:
PerspectiveWidget(ihq)

PerspectiveWidget(binding_mode='server', columns=['ts_code', 'trade_date', 'pct_chg', 'car', '沪深300'], table_n…

In [37]:
d6 = d5.join(ihq, left_on="日期", right_on="trade_date")
d6 = d6.unpivot(
    on=["总资产", "沪深300"], index="日期", variable_name="资产类型", value_name="财富"
)
d6

日期,资产类型,财富
date,str,f64
2022-07-11,"""总资产""",1.0035e6
2022-07-12,"""总资产""",1.0082e6
2022-07-13,"""总资产""",1.0143e6
2022-07-14,"""总资产""",1.0105e6
2022-07-15,"""总资产""",992825.88
…,…,…
2023-10-25,"""沪深300""",791288.453778
2023-10-26,"""沪深300""",793475.575065
2023-10-27,"""沪深300""",804367.614284
2023-10-30,"""沪深300""",809196.233072


In [38]:
PerspectiveWidget(d6)

PerspectiveWidget(binding_mode='server', columns=['日期', '资产类型', '财富'], table_name='0.5208403220038226', theme=…