In [1]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/drw-crypto-market-prediction/sample_submission.csv
/kaggle/input/drw-crypto-market-prediction/train.parquet
/kaggle/input/drw-crypto-market-prediction/test.parquet


In [2]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error, r2_score
from scipy.stats import pearsonr, spearmanr
import polars as pl
import numpy as np
from tqdm import tqdm

def get_cols_inf(df: pl.DataFrame) -> list[str]:
    """
    Returns a list of column names that contain any positive or negative infinity.
    """
    cols = []
    for col in df.columns:
        # df[col] is a Series; .is_infinite() → Boolean Series; .any() → Python bool
        try:
            if df[col].is_infinite().any():
                cols.append(col)
        except Exception:
            # if the column isn’t numeric, .is_infinite() might error—just skip it
            continue
    return cols

def get_nan_columns(df: pl.DataFrame) -> list[str]:
    """
    Returns a list of column names with any NaN/null values.
    """
    cols = []
    for col in df.columns:
        if df.select(pl.col(col).is_null().any()).item():
            cols.append(col)
    return cols

def get_cols_zerostd(df: pl.DataFrame) -> list[str]:
    """
    Returns a list of column names whose standard deviation is zero
    (or whose std returns None because all values are null).
    Non-numeric columns (e.g. datetime) are skipped.
    """
    cols = []
    for col, dtype in zip(df.columns, df.dtypes):
        # Only attempt std() on numeric dtypes
        if dtype.is_numeric():  
            # df[col] is a Series; .std() returns a Python float or None
            std_val = df[col].std()
            if std_val == 0.0 or std_val is None:
                cols.append(col)
    return cols


def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
    # Feature engineering
    df = df.with_columns([
        # bidask_ratio = bid_qty / ask_qty
        (pl.col("bid_qty") / pl.col("ask_qty")).alias("bidask_ratio"),

        # buysell_ratio = 0 if volume == 0 else buy_qty / sell_qty
        pl.when(pl.col("volume") == 0)
        .then(0)
        .otherwise(pl.col("buy_qty") / pl.col("sell_qty"))
        .alias("buysell_ratio"),

        # bidask_delta = bid_qty - ask_qty
        (pl.col("bid_qty") - pl.col("ask_qty")).alias("bidask_delta"),

        # buysell_delta = buy_qty - sell_qty
        (pl.col("buy_qty") - pl.col("sell_qty")).alias("buysell_delta"),

        # buysell_size = buy_qty + sell_qty
        (pl.col("buy_qty") + pl.col("sell_qty")).alias("buysell_size"),

        # bidask_size = bid_qty + ask_qty
        (pl.col("bid_qty") + pl.col("ask_qty")).alias("bidask_size"),
    ])
    return df
def preprocess_train(train: pl.DataFrame, columns_to_drop: list[str] = []) -> pl.DataFrame:
    """
    Mirror of the original pandas workflow, but using polars.
    1. Identify columns with infinite, NaN, or zero‐std and drop them.
    2. Drop any user‐specified columns (e.g. label or order‐book columns).
    3. (You can add normalized/scaling steps here if needed.)
    """
    df = train.clone()

    df = feature_engineering(df)
    
    #### Preprocessing
    cols_inf = get_cols_inf(df)
    print("Columns with infinite values:", cols_inf)

    cols_nan = get_nan_columns(df)
    print("Columns with NaN values:", cols_nan)

    cols_zerostd = get_cols_zerostd(df)
    print("Columns with zero standard deviation:", cols_zerostd)
    # Drop columns with infinite, NaN, or zero‐std values
    drop_columns = list(set(cols_inf) | set(cols_nan) | set(cols_zerostd) | set(columns_to_drop))
    if drop_columns:
        df = df.drop(drop_columns)
    # df = df.sort("timestamp", descending=False)
    return df, drop_columns

def preprocess_test(test: pl.DataFrame, columns_to_drop: list[str] = []) -> pl.DataFrame:
    df = test.clone()
    df = feature_engineering(df)
    df = df.drop(columns_to_drop)
    print("Columns dropped from test set:", columns_to_drop)
    return df

# Data

In [3]:
data = pl.read_parquet(
    "/kaggle/input/drw-crypto-market-prediction/train.parquet"
)
# data = pl.read_parquet(
#     source = "./data/train.parquet",
# )

y = data["label"]
X, drop_columns = preprocess_train(
    data,
    columns_to_drop=["label", "bid_qty", "ask_qty", "buy_qty", "sell_qty"]
)
X

Columns with infinite values: ['X697', 'X698', 'X699', 'X700', 'X701', 'X702', 'X703', 'X704', 'X705', 'X706', 'X707', 'X708', 'X709', 'X710', 'X711', 'X712', 'X713', 'X714', 'X715', 'X716', 'X717']
Columns with NaN values: []
Columns with zero standard deviation: ['X864', 'X867', 'X869', 'X870', 'X871', 'X872']


volume,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24,X25,X26,X27,X28,X29,X30,X31,X32,X33,X34,X35,X36,…,X855,X856,X857,X858,X859,X860,X861,X862,X863,X865,X866,X868,X873,X874,X875,X876,X877,X878,X879,X880,X881,X882,X883,X884,X885,X886,X887,X888,X889,X890,timestamp,bidask_ratio,buysell_ratio,bidask_delta,buysell_delta,buysell_size,bidask_size
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[ns],f64,f64,f64,f64,f64,f64
221.389,0.121263,-0.41769,0.005399,0.125948,0.058359,0.027359,0.03578,0.068219,1.034825,-0.029575,0.327805,0.485823,0.668596,0.617389,0.770037,0.857631,1.754456,0.572503,0.883229,0.58567,0.816321,0.529973,0.508244,0.448616,1.341892,1.406392,0.953631,1.183991,1.474789,0.774389,0.660586,0.269043,0.962802,0.966513,0.952759,0.952916,…,0.418618,-0.216525,0.200508,0.492433,-0.51249,0.541286,-0.336399,-1.027483,0.21857,1.728155,0.62414,-0.051211,0.691754,0.242124,2.096157,3.369195,0.244667,0.286611,0.722679,0.901931,1.000007,1.925423,1.847943,0.005676,0.190791,0.369691,0.37763,0.210153,0.159183,0.530636,2023-03-01 00:00:00,1.814006,3.921505,6.858,131.421,221.389,23.708
847.796,0.302841,-0.049576,0.356667,0.481087,0.237954,0.208359,0.217057,0.249624,0.948694,-0.183488,0.150526,0.308421,0.492232,0.529787,0.682958,0.770965,1.686504,0.273357,0.591695,0.442391,0.674792,0.460741,0.439681,0.380399,1.304113,1.003783,0.776628,1.015943,1.312735,0.696895,0.584217,0.231104,0.935145,0.938957,0.918275,0.919065,…,0.424977,-0.180112,0.213252,0.479806,-0.180527,0.450331,-0.31915,-1.024055,0.088014,1.665698,0.622775,-0.079621,0.691665,0.242091,2.46103,4.127584,0.321394,0.31246,0.746452,0.912371,1.003153,1.928569,1.849468,0.005227,0.18466,0.363642,0.374515,0.209573,0.158963,0.530269,2023-03-01 00:01:00,16.519692,1.633316,36.254,203.896,847.796,40.926
295.596,0.167462,-0.291212,0.083138,0.206881,0.101727,0.072778,0.081564,0.114166,0.896459,-0.261779,0.044571,0.200608,0.384558,0.476229,0.629848,0.718232,1.656707,0.140156,0.457268,0.376524,0.610116,0.429751,0.409316,0.350359,1.28325,0.760801,0.670816,0.917205,1.219124,0.653355,0.541739,0.210095,0.932614,0.936476,0.919497,0.92028,…,0.409942,-0.265966,0.191734,0.440207,-0.108209,0.420681,-0.316953,-1.024056,-0.147363,1.666893,0.621414,-0.080427,0.691674,0.242093,2.493249,4.182112,0.326701,0.314636,0.746681,0.911129,1.002502,1.928047,1.849282,0.004796,0.178719,0.357689,0.371424,0.208993,0.158744,0.529901,2023-03-01 00:02:00,0.007336,1.167619,-59.808,22.858,295.596,60.692
460.705,0.072944,-0.43659,-0.102483,0.017551,0.007149,-0.021681,-0.012936,0.019634,0.732634,-0.535845,-0.273947,-0.124959,0.056438,0.311539,0.465377,0.554022,1.663491,0.152084,0.468778,0.383696,0.618529,0.435326,0.415523,0.356895,1.319538,0.955549,0.789646,1.044941,1.353001,0.72392,0.613462,0.246212,0.936911,0.942204,0.940304,0.942497,…,0.400075,-0.322244,0.183687,0.404295,-0.169373,0.386584,-0.314775,-1.024058,-0.09459,1.735322,0.620057,-0.094702,0.69121,0.24193,2.525526,4.292975,0.350791,0.32357,0.753829,0.913363,1.002985,1.928621,1.849608,0.004398,0.172967,0.351832,0.368358,0.208416,0.158524,0.529534,2023-03-01 00:03:00,0.23149,2.686731,-16.151,210.779,460.705,25.881
142.818,0.17382,-0.213489,0.096067,0.215709,0.107133,0.078976,0.087818,0.120426,0.763537,-0.430945,-0.205298,-0.062118,0.117266,0.341493,0.495591,0.584519,1.668419,0.156177,0.472732,0.3871,0.623192,0.439034,0.419868,0.361572,1.324595,0.90546,0.78375,1.047708,1.36188,0.732001,0.622712,0.251095,0.931761,0.936818,0.928362,0.930464,…,0.391759,-0.369625,0.192377,0.415438,-0.198976,0.389969,-0.312628,-1.02406,0.162221,1.712096,0.618703,-0.091884,0.691207,0.241928,2.52443,4.306694,0.335599,0.31907,0.747533,0.908904,1.001286,1.927084,1.84895,0.004008,0.167391,0.346066,0.365314,0.207839,0.158304,0.529167,2023-03-01 00:04:00,7.869603,2.216115,23.707,54.004,142.818,30.609
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
94.388,0.020155,0.076565,0.228994,0.288856,0.151634,0.108347,0.088073,0.073729,0.071211,0.460379,0.494391,0.372694,0.270015,0.089571,0.096287,0.109409,0.78486,2.031313,1.673446,0.641512,0.772499,0.773449,0.956571,0.899399,0.477649,0.120275,0.017187,0.166928,0.519881,0.607589,0.797609,0.31223,0.459993,0.47937,0.48488,0.454508,…,0.807352,0.035728,0.551648,1.233784,-0.252775,-0.040562,-0.347201,-0.21873,0.636555,0.533902,0.142461,-0.768709,0.260958,0.197249,1.776607,2.702758,0.204378,0.270656,0.750338,1.060258,1.450851,3.219345,3.340686,0.008679,0.224656,0.401595,0.393726,0.212651,0.136494,0.243172,2024-02-29 23:55:00,0.611756,0.705263,-2.642,-16.314,94.388,10.968
177.372,0.016262,0.062527,0.214072,0.276463,0.146521,0.104164,0.084063,0.069788,0.024066,0.332808,0.387194,0.27384,0.174273,0.042308,0.049073,0.06222,0.811096,1.942052,1.721022,0.682607,0.818153,0.79747,0.981444,0.924992,0.496542,0.246743,0.089766,0.238524,0.590531,0.643586,0.834236,0.330893,0.446043,0.478758,0.474246,0.461256,…,0.813372,0.052891,0.572652,1.236672,-0.253142,-0.027268,-0.220352,-0.206114,0.649406,0.544298,0.149768,-0.779659,0.260872,0.197186,1.807592,2.742924,0.213951,0.271922,0.748216,1.056653,1.448602,3.216719,3.339353,0.007928,0.217422,0.395019,0.390476,0.212063,0.136305,0.243004,2024-02-29 23:56:00,0.564317,1.640604,-1.768,43.03,177.372,6.348
101.252,0.045407,0.109834,0.263577,0.329266,0.174214,0.13294,0.113052,0.098865,-0.05737,0.154488,0.217087,0.10915,0.011308,-0.039019,-0.032317,-0.019202,0.498355,0.628261,0.454894,0.056188,0.191078,0.483386,0.667775,0.611826,0.458054,-0.055595,-0.062112,0.083189,0.432974,0.565042,0.756211,0.292203,0.365089,0.420895,0.399799,0.422313,…,0.803276,0.024071,0.54706,1.191918,-0.342808,-0.065439,-0.220704,-0.206118,0.535593,0.498593,0.150411,-0.805622,0.260703,0.197062,1.7447,2.73189,0.210581,0.268868,0.741793,1.050909,1.445661,3.213444,3.33774,0.007243,0.210421,0.388549,0.387252,0.211477,0.136117,0.242836,2024-02-29 23:57:00,1.438736,2.292427,1.597,39.746,101.252,8.877
74.56,0.124783,0.244168,0.408704,0.480016,0.251493,0.211727,0.19216,0.178116,0.111335,0.44718,0.53661,0.439239,0.345835,0.129327,0.136199,0.149399,0.803323,1.680122,1.620743,0.655204,0.794395,0.78617,0.971394,0.916158,0.496689,0.230439,0.089445,0.23383,0.582657,0.640532,0.832325,0.330608,0.441305,0.467176,0.460681,0.446152,…,0.807143,0.035102,0.568606,1.212147,-0.369407,-0.035077,-0.221043,-0.206122,0.647059,0.528757,0.151051,-0.774165,0.260957,0.197257,1.74259,2.643514,0.203284,0.264413,0.733953,1.044452,1.442484,3.209945,3.33603,0.006608,0.203642,0.382184,0.384054,0.210892,0.135928,0.242668,2024-02-29 23:58:00,1.169353,0.428489,0.83,-29.83,74.56,10.632


# Preprocessing

In [4]:
from sklearn.model_selection import train_test_split

# Define your date range as Python datetime objects:
from datetime import datetime
start = datetime(2023, 3, 1, 0, 0)
end   = datetime(2024, 2, 29, 23, 59, 59)

# === filter on the timestamp column ===
# Assume “ts” is the datetime column in X.
X_period = X.filter(
    (pl.col("timestamp") >= pl.lit(start)) & (pl.col("timestamp") <= pl.lit(end))
)

# Extract y_period likewise:
# In polars, “y” is still a column expression, so filter the original `data`:
y_period = data.filter(
    (pl.col("timestamp") >= pl.lit(start)) & (pl.col("timestamp") <= pl.lit(end))
)["label"]  # This returns a polars Series.

# If you need numpy for scikit‐learn, convert now:
X_np = X_period.drop(["timestamp"]).to_numpy()
y_np = y_period.to_numpy()

# Model Training

In [5]:
from lightgbm import LGBMRegressor

def fit_lightgbm_regression(
    X: np.ndarray,
    y: np.ndarray,
    n_estimators: int = 100,
    learning_rate: float = 0.1,
    num_leaves: int = 31,
    **kwargs
) -> LGBMRegressor:
    """
    X and y must be NumPy arrays. Since polars DataFrames are not
    directly accepted by scikit‐learn/lightgbm, we pass .to_numpy().
    """
    model = LGBMRegressor(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        num_leaves=num_leaves,
        **kwargs
    )
    model.fit(X, y)
    return model

model = fit_lightgbm_regression(
    X_np,
    y_np,
    # 1. Learning rate: much lower than 0.3 to allow gradual fitting.
    learning_rate=0.05,

    # 2. Number of trees: increase so that η·T is roughly O(50–100) in practice.
    #    Here, 1000 trees × 0.05 = 50 “effective steps” of gradient boosting.
    n_estimators=1000,  

    # 3. num_leaves: controls maximum number of terminal nodes per tree.
    #    A rule of thumb is ~2^(max_depth). For dataset with moderate complexity,
    #    num_leaves=64 (≈2^6) is common; if features are very noisy, reduce it.
    num_leaves=64,      

    # 4. max_depth: optional cap on tree depth—keeps each tree from growing too deep.
    #    If you set max_depth=10, then num_leaves is effectively ≤ 2^10, but
    #    most practitioners leave max_depth unset when they tune num_leaves directly.
    max_depth=10,        

    # 5. min_data_in_leaf (min_child_samples): ensures a leaf has enough observations.
    #    E.g., if you have 100 k rows total, min_data_in_leaf=20 or 50 prevents overfitting.
    min_data_in_leaf=20, 

    # 6. subsample (a.k.a. bagging_fraction): to reduce variance, randomly sample rows.
    #    0.8 means each tree sees 80 % of data. Coupled with subsample_freq=1 (every tree).
    subsample=0.8,       
    subsample_freq=1,    

    # 7. colsample_bytree (a.k.a. feature_fraction): randomly sample 80 % of features per tree.
    colsample_bytree=0.8,

    # 8. Regularization: L1 or L2 to further guard against overfitting.
    reg_alpha=0.1,   # L1 regularization
    reg_lambda=1.0,  # L2 regularization

    # 9. Other sensible defaults:
    n_jobs=-1,
    random_state=42,
    verbosity=1,
)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 8.901727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 221834
[LightGBM] [Info] Number of data points in the train set: 525887, number of used features: 870
[LightGBM] [Info] Start training from score 0.036126


In [None]:
model = fit_lightgbm_regression(
    X_np,
    y_np,
    learning_rate=0.05,
    n_estimators=1000,  
    num_leaves=64,      
    max_depth=10,        
    min_data_in_leaf=20, 
    subsample=0.8,       
    subsample_freq=1,
    colsample_bytree=0.8,
    reg_alpha=0.1,   # L1 regularization
    reg_lambda=1.0,  # L2 regularization
    n_jobs=-1,
    random_state=42,
    verbosity=1,
)

# Predict

In [6]:
test = pl.read_parquet("/kaggle/input/drw-crypto-market-prediction/test.parquet")
# test = pl.read_parquet(
#     source = "./data/test.parquet",
# )
test = test.with_row_index("ID", offset=1)
X_test = preprocess_test(test, columns_to_drop=drop_columns).drop(["ID"])
X_test

Columns dropped from test set: ['X702', 'X699', 'X870', 'X867', 'X871', 'X872', 'ask_qty', 'X697', 'X704', 'X703', 'X709', 'X869', 'X710', 'X698', 'X707', 'X705', 'X715', 'X716', 'X708', 'X714', 'X864', 'X701', 'X717', 'X700', 'X706', 'label', 'X711', 'buy_qty', 'bid_qty', 'sell_qty', 'X712', 'X713']


volume,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24,X25,X26,X27,X28,X29,X30,X31,X32,X33,X34,X35,X36,…,X854,X855,X856,X857,X858,X859,X860,X861,X862,X863,X865,X866,X868,X873,X874,X875,X876,X877,X878,X879,X880,X881,X882,X883,X884,X885,X886,X887,X888,X889,X890,bidask_ratio,buysell_ratio,bidask_delta,buysell_delta,buysell_size,bidask_size
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
21.558,-0.732818,0.512331,-0.041982,-0.59826,-0.517646,-0.720098,-0.76901,-0.780178,-0.3365,0.249788,-0.156486,-0.495663,-0.76273,-0.471363,-0.447478,-0.408999,0.535565,-0.171083,-0.227923,-0.079453,-0.041204,0.048349,0.151941,0.240157,1.353095,-0.387866,-0.276421,-0.243297,-0.186772,0.034361,0.163524,0.166223,0.135839,0.025773,0.182896,0.054399,…,0.087326,0.602466,0.888982,0.235755,-0.074722,-1.5190e-25,0.168711,-0.033501,0.242743,-0.270279,1.051975,0.43189,-1.268835,0.243075,0.237836,1.608907,3.98586,0.306682,0.343981,0.6094,0.505601,0.43329,1.014336,1.367567,1.584126,1.584126,1.584126,1.584125,1.567979,1.426368,2.192682,0.009405,0.964999,-12.007,-0.384,21.558,12.235
148.545,-0.337995,-0.412176,-0.259468,-0.334809,-0.247443,-0.243987,-0.214849,-0.224255,-0.539625,-0.315144,-0.202614,-0.293257,-0.508739,-0.333933,-0.336385,-0.357372,-0.348564,0.065579,0.139376,-0.047501,-0.227728,-0.145745,-0.236345,-0.519209,1.102136,0.128703,0.179227,0.128473,0.023283,0.033936,-0.123918,-0.278002,-0.225812,-0.2401,-0.184761,-0.207515,…,1.111155,1.187569,0.174188,0.353079,-0.252909,0.128268,-0.502716,0.373259,0.955962,-0.356306,0.38169,0.718047,-1.204197,0.149122,0.210784,3.143691,3.913445,0.231629,0.312275,0.68531,0.661501,0.725448,1.748939,1.848177,0.000131,0.000323,0.001746,0.007319,0.050925,0.142206,0.739759,0.819041,11.072903,-0.536,123.937,148.545,5.388
80.561,0.111249,0.458221,0.466916,0.574081,0.324722,0.291298,0.295451,0.324332,-0.007327,0.351338,0.358218,0.329307,0.29087,0.012313,-0.008258,0.057786,0.332306,0.167834,0.197985,0.10225,0.060063,-0.139541,-0.224249,-0.24603,0.215433,0.150163,0.046509,0.044225,0.003683,-0.21376,-0.408667,-0.295609,0.029641,0.029593,0.021453,0.021404,…,0.665538,0.589635,-0.26551,-0.340681,-0.002949,0.136127,0.273964,0.286688,-0.488119,0.268433,0.230857,0.192657,-0.618164,0.252338,0.203609,1.702517,3.665623,0.229644,0.278697,0.592664,0.629683,0.743693,1.70468,1.772028,0.00055,0.003597,0.009265,0.019948,0.072535,0.169672,0.807862,0.463082,0.409124,-1.258,-33.781,80.561,3.428
129.6,-0.149399,-0.640638,-0.873778,-1.026144,-0.508816,-0.318499,-0.27099,-0.26999,-0.206264,-0.491395,-0.791609,-0.849243,-0.737791,-0.213973,-0.258329,-0.334767,-0.501566,-0.143377,-0.101949,-0.043811,-0.021846,0.030501,-0.010734,-0.143917,1.89793,0.166798,0.151133,0.156303,0.179812,0.145286,0.095092,-0.050541,-0.233126,-0.197698,-0.239907,-0.210529,…,0.140885,0.945045,1.247777,-0.175964,-0.279785,-4.2727e-43,-0.312154,0.121647,0.145365,-0.364048,0.338114,0.409994,-2.191214,0.16445,0.23466,2.349036,3.508278,0.164611,0.178355,0.380719,0.406191,0.451041,0.930946,1.037839,1.382037,1.382037,1.382037,1.382037,1.381752,1.363445,2.465509,13.243509,8.906742,13.676,103.436,129.6,15.91
93.636,-0.694662,0.611254,0.067671,-0.531632,-0.58145,-0.670998,-0.658519,-0.641033,-0.709044,0.522476,0.176614,-0.358,-0.913063,-0.542747,-0.549789,-0.535438,-0.039,0.497418,0.220629,0.023939,-0.064359,-0.030559,-0.063269,-0.141931,2.018999,-0.201351,-0.259154,-0.308402,-0.341364,-0.106369,0.003638,0.151764,-0.216639,-0.262946,-0.172207,-0.242251,…,0.587153,0.528421,0.286078,-0.345662,-0.122057,-0.969495,0.065162,0.172275,-0.38339,0.184046,0.815732,0.357557,0.096416,0.255045,0.234342,2.245113,4.026028,0.576855,0.453086,0.78344,0.68716,0.602724,1.080267,1.141878,0.000363,0.012718,0.137533,0.464112,1.050577,1.268567,2.499015,0.002328,0.878883,-14.145,-6.036,93.636,14.211
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
117.261,0.198687,0.135445,0.505441,0.753963,0.410612,0.306174,0.289102,0.32499,0.729853,0.325204,1.085678,1.431536,1.419513,0.570386,0.615125,0.747517,1.836788,0.553637,1.026012,0.573284,0.696552,0.774605,1.129025,1.469263,2.108377,0.28232,0.119491,0.030734,0.097749,0.49172,0.929869,0.788478,0.870172,0.92816,0.897521,0.952439,…,1.203819,1.003302,0.418028,0.582676,0.640543,0.095304,0.852907,0.010261,-0.280154,-0.064651,2.095047,0.397575,-0.018446,0.249358,0.227386,1.773041,3.005705,0.559482,0.406204,0.684409,0.773015,1.023725,2.185495,2.257754,1.239053,0.646543,0.36806,0.196958,0.074014,0.059849,0.245092,3.695316,1.746738,6.272,31.879,117.261,10.926
64.208,-0.451693,-0.433553,-0.232846,-0.190379,-0.215448,-0.454212,-0.517093,-0.514747,-0.308224,-0.00429,0.114162,-0.00721,-0.424173,-0.539669,-0.566827,-0.522195,1.381301,0.063865,0.13718,0.009016,-0.021502,0.176172,0.418806,0.633591,1.822786,-0.019996,-0.031023,-0.12061,-0.084458,0.356403,0.832482,0.61472,0.325817,0.34695,0.409159,0.433315,…,1.509983,1.763117,0.948866,0.498332,-0.529497,0.493532,-0.374436,-0.087166,-0.177066,0.186569,1.703517,0.812692,-1.342336,0.237381,0.237922,2.230804,4.22938,0.205731,0.31069,0.717422,0.840936,0.901631,1.776626,1.724998,0.000021,0.000027,0.000029,0.000083,0.049487,0.2749,1.162933,0.689008,2.352723,-2.269,25.906,64.208,12.323
62.209,0.574457,0.48224,1.07644,1.363869,0.779195,0.670684,0.564027,0.556401,0.63542,0.286962,0.862118,1.196873,1.38265,0.506039,0.39349,0.414183,0.379163,0.197129,0.250243,0.099678,0.057377,-0.004603,-0.000444,-0.06885,1.304604,0.224014,-0.064912,-0.376931,-0.696543,-0.555989,-0.742085,-0.49279,0.154908,0.169272,0.115327,0.141064,…,1.025781,0.684136,0.493307,-0.142105,0.405009,-0.434661,0.156975,0.13996,-0.293231,0.007022,0.545298,0.49279,-0.737421,0.256021,0.243572,1.959328,3.369222,0.372525,0.330779,0.632952,0.628388,0.685204,1.340804,1.420642,0.0006,0.00574,0.089671,0.374153,0.977561,1.223688,2.454477,12.125237,0.624977,11.726,-14.357,62.209,13.834
163.173,-0.560855,-0.324104,-0.356042,-0.45153,-0.30604,-0.402684,-0.468723,-0.527814,-0.472296,0.310539,0.237107,0.088514,-0.112136,-0.179829,-0.277702,-0.370524,-1.160989,-0.332739,-0.468868,-0.347465,-0.61089,-0.871605,-1.347199,-1.655619,1.122232,0.229955,0.038826,-0.129316,-0.453353,-0.715904,-0.785402,-0.19266,-0.246434,-0.242988,-0.229998,-0.231492,…,1.684115,-1.453581,-0.693012,0.015774,0.017319,-0.245766,-0.443753,0.630373,-1.183208,0.164258,0.125945,0.810227,-0.392165,0.184982,0.225708,4.109402,4.230086,0.856037,0.570565,1.006882,0.981841,1.111372,2.742464,3.034341,0.000089,0.000102,0.000153,0.000627,0.021699,0.044349,0.180866,3.06823,0.460749,2.91,-60.237,163.173,5.724


In [7]:
y_pred = model.predict(X_test.to_numpy())
submission = pl.DataFrame({
    "ID": test["ID"],
    "prediction": y_pred
})
submission



ID,prediction
u32,f64
1,-0.00387
2,-0.115677
3,0.080374
4,-0.068495
5,0.114384
…,…
538146,0.700414
538147,-0.111473
538148,0.159216
538149,-0.515736


# Submission

sample_submission.csv
- ID
- prediction

In [8]:
submission.write_csv("/kaggle/working/submission.csv")