In [1]:
import polars as pl
import polars.selectors as cs

from predictables.encoding.src.lagged_mean_encoding import CredWtdMean

In [2]:
lf = pl.read_excel(
    "/app/ts_testing.xlsx", sheet_name="Values", engine="calamine"
).write_parquet("ts_testing.parquet")

In [3]:
lf = pl.scan_parquet("/app/ts_testing.parquet").select(
    [
        "index",
        "date",
        "product_cat",
        "product_subcat",
        "product_code",
        "hit",
        "total_30_30_average_laplace(1)_smoothed",
        "code_30_30_average_laplace(1)_smoothed",
        "cred_wtd_30_30_average",
        "cred_wtd_30_30_Z",
        "cred_wtd_30_30_n",
    ]
)

lf = lf.with_columns(
    [pl.col(c).cast(pl.Categorical).name.keep() for c in lf.select(cs.string()).columns]
)

lf = (
    CredWtdMean()
    .lf(lf)
    .date_col("date")
    .numerator_col("hit")
    .denominator_col("count")
    .cat_col("product_code")
    .index_col("index")
    .offset(30)
    .window(30)
    .rejoin(True)
    .laplace_alpha(1)
    # .rename("cred_wtd")
    .run()
).drop(["count", "complement", "K"])

lf.head().collect()

index,date,product_cat,product_subcat,product_code,hit,total_30_30_average_laplace(1)_smoothed,code_30_30_average_laplace(1)_smoothed,cred_wtd_30_30_average,cred_wtd_30_30_Z,cred_wtd_30_30_n,individual,collective,n,Z,CRED_WTD(hit[product_code])[lag:30/win:30]
i64,date,cat,cat,cat,i64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64
0,2020-01-01,"""D""","""f""","""D-f""",1,0.5,0.5,0.5,0.0,0,0.0,0.0,0.0,0.0,0.0
1,2020-01-02,"""B""","""f""","""B-f""",1,0.5,0.5,0.5,0.0,0,0.0,0.0,0.0,0.0,0.0
2,2020-01-03,"""A""","""d""","""A-d""",0,0.5,0.5,0.5,0.0,0,0.0,0.0,0.0,0.0,0.0
3,2020-01-03,"""A""","""e""","""A-e""",1,0.5,0.5,0.5,0.0,0,0.0,0.0,0.0,0.0,0.0
4,2020-01-04,"""D""","""c""","""D-c""",1,0.5,0.5,0.5,0.0,0,0.0,0.0,0.0,0.0,0.0


In [4]:
lf = (
    LaplaceSmoothedMean()
    .lf(lf)
    .date_col("date")
    .numerator_col("hit")
    .denominator_col("count")
    .cat_col("product_code")
    .index_col("index")
    .offset(30)
    .window(30)
    .rejoin(True)
    .laplace_alpha(1)
    .rename("individual")
    .run()
)

lf = (
    LaplaceSmoothedMean()
    .lf(lf)
    .date_col("date")
    .numerator_col("hit")
    .denominator_col("count")
    .index_col("index")
    .offset(30)
    .window(30)
    .rejoin(True)
    .laplace_alpha(1)
    .rename("complement")
    .run()
)

# lf.tail().collect()

In [5]:
lf = (
    (
        DynamicRollingCount()
        .lf(lf)
        .date_col("date")
        .x_col("hit")
        .cat_col("product_code")
        .index_col("index")
        .offset(30)
        .window(30)
        .rejoin(True)
        .op("ROLLING_COUNT")
        .rename("n")
        .run()
    )
    .with_columns([pl.lit(5).cast(pl.Float64).alias("K")])
    .with_columns([pl.col("n").truediv(pl.col("n") + pl.col("K")).alias("Z")])
).with_columns(
    [
        (pl.col("individual") * pl.col("Z") + pl.col("complement") * (1 - pl.col("Z")))
        .round(5)
        .alias("cred_wtd")
    ]
)

lf.tail().collect()

index,date,product_cat,product_subcat,product_code,hit,total_30_30_average_laplace(1)_smoothed,code_30_30_average_laplace(1)_smoothed,cred_wtd_30_30_average,cred_wtd_30_30_Z,cred_wtd_30_30_n,count,individual,complement,n,K,Z,cred_wtd
i64,date,cat,cat,cat,i64,f64,f64,f64,f64,i64,i32,f64,f64,f64,f64,f64,f64
2554,2023-08-27,"""A""","""c""","""A-c""",0,0.25,0.2,0.20238,0.44444,4,1,0.2,0.25,4.0,5.0,0.444444,0.22778
2555,2023-08-28,"""B""","""c""","""B-c""",0,0.25424,0.16667,0.21046,0.5,5,1,0.166667,0.254237,5.0,5.0,0.5,0.21045
2556,2023-08-28,"""C""","""d""","""C-d""",0,0.25424,0.6,0.28013,0.44444,4,1,0.2,0.254237,4.0,5.0,0.444444,0.23013
2557,2023-08-28,"""C""","""c""","""C-c""",0,0.25424,0.5,0.26395,0.16667,1,1,0.5,0.254237,1.0,5.0,0.166667,0.2952
2558,2023-08-28,"""C""","""e""","""C-e""",0,0.25424,0.25,0.27609,0.375,3,1,0.25,0.254237,3.0,5.0,0.375,0.25265


In [6]:
lf = (
    CredWtdMean()
    .lf(lf)
    .date_col("date")
    .numerator_col("hit")
    .denominator_col("count")
    .cat_col("product_code")
    .index_col("index")
    .offset(30)
    .window(30)
    .rejoin(True)
    .laplace_alpha(1)
    # .rename("cred_wtd")
    .run()
).drop(["count", "complement", "K"])

lf.tail().collect()

index,date,product_cat,product_subcat,product_code,hit,total_30_30_average_laplace(1)_smoothed,code_30_30_average_laplace(1)_smoothed,cred_wtd_30_30_average,cred_wtd_30_30_Z,cred_wtd_30_30_n,individual,n,Z,cred_wtd,collective,CRED_WTD(hit[product_code])[lag:30/win:30]
i64,date,cat,cat,cat,i64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64
2554,2023-08-27,"""A""","""c""","""A-c""",0,0.25,0.2,0.20238,0.44444,4,0.2,4.0,0.444444,0.22778,0.25,0.227778
2555,2023-08-28,"""B""","""c""","""B-c""",0,0.25424,0.16667,0.21046,0.5,5,0.166667,5.0,0.5,0.21045,0.254237,0.210452
2556,2023-08-28,"""C""","""d""","""C-d""",0,0.25424,0.6,0.28013,0.44444,4,0.6,4.0,0.444444,0.23013,0.254237,0.40791
2557,2023-08-28,"""C""","""c""","""C-c""",0,0.25424,0.5,0.26395,0.16667,1,0.5,1.0,0.166667,0.2952,0.254237,0.295198
2558,2023-08-28,"""C""","""e""","""C-e""",0,0.25424,0.25,0.27609,0.375,3,0.25,3.0,0.375,0.25265,0.254237,0.252648
