In [33]:
import zipfile

with zipfile.ZipFile('f.zip', 'r') as zip_ref:
    zip_ref.extractall('houses')

print("Folder unzipped successfully!")

Folder unzipped successfully!


In [2]:
# =====================================================
# INSTALL (required for Kaggle/Colab)
# =====================================================
%pip install lightautoml==0.4.1 -q

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

import warnings
warnings.filterwarnings("ignore")



[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
# Replace with your files
train_df = pd.read_csv("houses/train.csv")
test_df  = pd.read_csv("houses/test.csv")

TARGET = "SalePrice"


In [4]:
# Task type: reg or binary or multiclass
# e.g. Task(name='binary', metric='auc')
task = Task(name='reg', metric='mse')

TIME_LIMIT = 3600 * 1.5   # 1.5 hours

roles = {
    'target': TARGET
}


In [5]:
train_data, valid_data = train_test_split(
    train_df, test_size=0.2, random_state=42
)

In [6]:
# GPU requires pip install catboost>=1.2, but 0.4.1 works fine with default CPU for CatBoost
# LAMA will use GPU internally for NN-based models.

automl = TabularAutoML(
    task=task,
    timeout=TIME_LIMIT,
    cpu_limit=4,           # adjust based on machine
    reader_params={"n_jobs": 4},
    tuning_params={
        "max_tuning_iters": 30,    # strong tuning (default ~7)
        "max_tuning_time": int(TIME_LIMIT * 0.3)
    },
    selection_params={
        "mode": 2,
        "importance_type": "feature_importance"
    },
    
)

oof_preds = automl.fit_predict(
    train_data,
    roles=roles,
    valid_data=valid_data,
    verbose=1
)

print("Training finished!")

[16:23:35] Stdout logging level is INFO.
[16:23:35] Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer
[16:23:35] Task: reg

[16:23:35] Start automl preset with listed constraints:
[16:23:35] - time: 5400.00 seconds
[16:23:35] - CPU: 4 cores
[16:23:35] - memory: 16 GB

[16:23:35] [1mTrain data shape: (1168, 81)[0m

[16:23:39] Layer [1m1[0m train process start. Time left 5395.87 secs


  cnts = concat([cnts, Series([cnts.shape[0] + 1], index=[np.nan])])


[16:23:40] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[16:23:40] Time left 5394.88 secs

[16:23:42] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:43] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:44] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:44] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:45] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:46] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:46] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:47] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:47] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:49] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:52] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:54] [1mSelector_LightGBM[0m fitting and predicting completed
[16:23:56] [1mSelector_LightGBM[0m fitting 

Optimization Progress: 100%|██████████| 101/101 [08:00<00:00,  4.76s/it, best_trial=98, best_value=-6.9e+8]

[16:33:04] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed





[16:33:08] [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed
[16:33:08] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 1620.00 secs


Optimization Progress: 100%|██████████| 101/101 [07:24<00:00,  4.40s/it, best_trial=75, best_value=-5.99e+8]

[16:40:33] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed





[16:40:39] Time left 4376.65 secs

[16:40:39] [1mLayer 1 training completed.[0m

[16:40:39] Blending: optimization starts with equal weights. Score = [1m-658334464.0000000[0m
[16:40:39] Blending: iteration [1m0[0m: score = [1m-616625664.0000000[0m, weights = [1m[0.         0.         0.10494811 0.         0.8950519 ][0m
[16:40:39] Blending: no improvements for score. Terminated.

[16:40:39] Blending: best score = [1m-616625664.0000000[0m, best weights = [1m[0.         0.         0.10494811 0.         0.8950519 ][0m
[16:40:39] [1mAutoml preset training completed in 1023.51 seconds[0m

[16:40:39] Model description:
Final prediction for new objects (level 0) = 
	 0.10495 * (1 averaged models Lvl_0_Pipe_1_Mod_1_Tuned_LightGBM) +
	 0.89505 * (1 averaged models Lvl_0_Pipe_1_Mod_3_Tuned_CatBoost) 

Training finished!


In [10]:
from sklearn.metrics import mean_squared_error

val_true = valid_data[TARGET].values
val_pred = oof_preds.data[~np.isnan(oof_preds.data)]

rmse = mean_squared_error(val_true, val_pred)
print("Validation MSE:", rmse)


Validation MSE: 616625664.0


In [8]:
test_preds = automl.predict(test_df).data

In [9]:
sub = pd.read_csv("houses/sample_submission.csv")
sub['Survived'] = test_preds.astype(int)

sub.to_csv("submission3.csv", index=False)
sub.head()


Unnamed: 0,Id,SalePrice,Survived
0,1461,169277.052498,131568
1,1462,187758.393989,158158
2,1463,183583.68357,185757
3,1464,179317.477511,192405
4,1465,150730.079977,190692


## see stats

In [None]:
print("===== MODELS (by level) =====")
for i, level in enumerate(automl.models):
    print(f"\n--- Level {i} ---")
    for m in level:
        print(m.name)

print("===== FEATURE IMPORTANCES =====")
fi = automl.get_feature_scores()
print(fi)

if hasattr(automl, "tuner"):
    print("===== TUNING STATS =====")
    print(automl.tuner)

print("===== TIME STATS =====")
print(automl.timer.get_stage_times())

print("===== BLENDER =====")
print(automl.blender)

if hasattr(automl.blender, "weights"):
    print("===== BLENDER WEIGHTS =====")
    print(automl.blender.weights)

print("===== ALL MODEL PARAMETERS =====")
for i, level in enumerate(automl.models):
    print(f"\n--- Level {i} ---")
    for model in level:
        print(f"Model: {model.name}")
        print(model.params)
        print("-" * 30)

print("===== VALIDATION SCORE =====")
print(automl.score)
