In [1]:
!pip install tabpfn



In [21]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, KFold
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_percentage_error, make_scorer
from tabpfn import TabPFNRegressor
import torch

In [22]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import cross_val_score, StratifiedKFold
from tabpfn import TabPFNRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_percentage_error, make_scorer

In [23]:
# Evaluation functions
def calculate_mape(y_true, y_pred):
    """Use sklearn's MAPE implementation."""
    return mean_absolute_percentage_error(y_true, y_pred)

def calculate_leaderboard_score(mape_cost, leaderboard_type='public'):
    """Leaderboard score: max[10, 100 - (90 × MAPE / Ref Cost)]"""
    ref_cost = 2.72  # Public leaderboard reference cost
    score = max(10, 100 - (90 * mape_cost) / ref_cost)
    return score

def evaluate_model(y_true, y_pred, leaderboard_type='public'):
    """Evaluate model using MAPE and leaderboard score."""
    mape = calculate_mape(y_true, y_pred)
    score = calculate_leaderboard_score(mape, leaderboard_type)
    return score, mape

# Create scorer for cross-validation (negative because sklearn maximizes scores)
def leaderboard_scorer(y_true, y_pred):
    score, _ = evaluate_model(y_true, y_pred)
    return score

custom_scorer = make_scorer(leaderboard_scorer, greater_is_better=True)


In [24]:

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [25]:
feature_columns = train.columns
target_columns = [col for col in train.columns if 'BlendProperty' in col]
feature_columns=train.columns.drop(target_columns)

In [7]:
# Separate features and target
# Assuming last column is target, adjust if needed
X_train = train[feature_columns]  # All columns except last
y_train = train[target_columns]   # Last column as target

X_test = test[feature_columns]


In [12]:
# Enhanced device selection
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.cuda.empty_cache()  # Clear GPU memory

In [None]:
# Optimized initialization
clf = TabPFNRegressor()

In [19]:
clf = MultiOutputRegressor(clf)

In [16]:
cv_scores = cross_val_score(clf, X_train, y_train, 
                           cv=KFold(n_splits=5, shuffle=True, random_state=42), 
                           scoring=custom_scorer)


ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 98, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'estimator' parameter of MultiOutputRegressor must be an object implementing 'fit' and 'predict'. Got [TabPFNRegressor(device='cuda'), TabPFNRegressor(device='cuda'), TabPFNRegressor(device='cuda')] instead.


In [19]:
print(f"TabPFN CV Leaderboard Score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

TabPFN CV Leaderboard Score: 83.2954 (+/- 8.2757)


In [20]:
mape_scores = cross_val_score(clf, X_train, y_train, 
                             cv=KFold(n_splits=5, shuffle=True, random_state=42), 
                             scoring=make_scorer(calculate_mape, greater_is_better=False))
print(f"TabPFN CV MAPE: {-mape_scores.mean():.4f} (+/- {mape_scores.std() * 2:.4f})")

TabPFN CV MAPE: 0.5049 (+/- 0.2501)


In [20]:
# Fit the model
clf.fit(X_train, y_train)

InvalidParameterError: The 'estimator' parameter of MultiOutputRegressor must be an object implementing 'fit' and 'predict'. Got [TabPFNRegressor(device='cuda'), TabPFNRegressor(device='cuda'), TabPFNRegressor(device='cuda')] instead.

In [22]:
# Make predictions
predictions = clf.predict(X_test)
print(f"Predictions shape: {predictions.shape}")

Predictions shape: (500, 10)


In [23]:
# First, create the list of new column names
target_columns = [f'BlendProperty{i}' for i in range(1, 11)]

# Create a copy to avoid modifying your original 'test' DataFrame
test_with_predictions = test.copy()

# Directly assign the NumPy array to the new columns
# Pandas is smart enough to map the columns of the array to the new column names.
test_with_predictions[target_columns] = predictions

test_with_predictions=test_with_predictions[['ID']+target_columns]
# --- 3. Verify the Result ---
print("Shape of the new combined DataFrame:", test_with_predictions.shape)
print("\nTop 5 rows of the combined DataFrame:")


Shape of the new combined DataFrame: (500, 11)

Top 5 rows of the combined DataFrame:


In [24]:
test_with_predictions

Unnamed: 0,ID,BlendProperty1,BlendProperty2,BlendProperty3,BlendProperty4,BlendProperty5,BlendProperty6,BlendProperty7,BlendProperty8,BlendProperty9,BlendProperty10
0,1,0.155831,0.195178,0.804584,0.715683,0.351751,0.767343,0.766554,0.295481,-0.372142,0.316256
1,2,-0.657282,-0.712930,-1.176398,0.031496,-0.728128,-0.125800,-1.169354,-1.111023,-0.909405,-0.008636
2,3,1.814321,1.163385,1.158747,1.111380,2.547431,1.795956,1.116346,2.001594,0.736295,2.209446
3,4,-0.465750,0.080764,0.807001,-0.700137,1.889994,-0.536108,0.759573,1.694087,0.736576,-0.907193
4,5,0.098901,-1.024938,1.039546,0.523161,2.504419,0.263571,1.026625,-0.001627,-0.453031,1.148754
...,...,...,...,...,...,...,...,...,...,...,...
495,496,0.201744,-0.962754,1.106679,-0.241950,-0.277259,-0.782664,1.095795,-0.589779,-1.307725,-0.367207
496,497,-2.184760,-1.257445,-1.061769,-2.289684,-0.627200,-2.443931,-1.045503,-1.869990,-1.227674,-1.443271
497,498,1.955008,2.124726,0.296155,1.346314,-0.014256,0.669229,0.268499,1.079289,0.217352,0.451333
498,499,-0.216230,0.814024,1.609025,-1.313257,-0.912242,0.201408,1.892087,0.550111,0.096723,1.229987


In [26]:
test_with_predictions.to_csv('tabpfn.csv',index=False)