# Used Car Price Regression
In this notebook, Used Price Car will be predicted through ensemble of tuned XGBoost, LGBM, and CatBoost.


# Preparation

## Import libraries

In [None]:
import pandas as pd              # For data manipulation and analysis
import numpy as np               # For numerical computing
from datetime import datetime
import scipy.stats as stats      # For statistical analysis
import math
import matplotlib                # For plotting and visualization
import matplotlib.pyplot as plt  
from pandas.plotting import parallel_coordinates
import seaborn as sns            # For statistical data visualization
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

In [None]:
# For machine learning
from lightgbm import LGBMRegressor
from lightgbm import log_evaluation, early_stopping
from catboost import CatBoostRegressor, Pool
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
import optuna

## Load datasets

In [None]:
df_train = pd.read_csv('/kaggle/input/playground-series-s4e9/train.csv',index_col=0)
df_test = pd.read_csv('/kaggle/input/playground-series-s4e9/test.csv', index_col=0)

# Exploratory Data Analysis

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
df_train.shape, df_test.shape

# Data Visualization

Distribution of Target - Price

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.histplot(data = df_train['price'], kde = False)

# Set the main title
plt.suptitle('Distribution of Price', fontsize = 10)

# Set axis labels
ax.set_xlabel('')
ax.set_xticklabels('')
ax.tick_params(left = False, bottom = False)

# Set the data labels
ax.get_xaxis().set_major_formatter(
    matplotlib.ticker.FuncFormatter(lambda x, p:"{:.2f}M".format(x / 1e6)))

sns.despine(right = True)

Explore the distribution of each numeric variables 

In [None]:
df_train.info()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Select numeric columns
numeric_columns = df_train.select_dtypes(include=['float64', 'int64'])

# Calculate correlation matrix
correlation_matrix = numeric_columns.corr()

# Visualize correlation with price
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix[['price']].sort_values(by='price', ascending=False), annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation of Numeric Features with Price')
plt.show()

Explore the distribution of each categorical variables 

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='brand', y='price', data=df_train)

# Customize the plot
plt.title('Price Distribution by Brand')
plt.xticks(rotation=90)  # Rotate x-axis labels if needed
plt.xlabel('Brand')
plt.ylabel('Price')

plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='fuel_type', y='price', data=df_train)

# Customize the plot
plt.title('Price Distribution by Fuel Type')
plt.xticks(rotation=90)  # Rotate x-axis labels if needed
plt.xlabel('Model')
plt.ylabel('Price')

plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='accident', y='price', data=df_train)

# Customize the plot
plt.title('Price Distribution by Accident')
plt.xlabel('Accident')
plt.ylabel('Price')

plt.show()

# Feature Engineering
Credit Feature Engineering to https://www.kaggle.com/code/danishyousuf19/regression-of-used-car-prices

In [None]:
def extract_accident_binary(df):
    
    mapping = {
        'None reported': 0,
        'At least 1 accident or damage reported': 1
    }
    df['accident'] = df['accident'].map(mapping)
    
    return df

# Apply the function to the dataframe
df_train = extract_accident_binary(df_train)
df_test = extract_accident_binary(df_test)

In [None]:
def extract_data(df):
    # Ensure the original DataFrame remains intact
    df = df.copy()
    df['transmission'] = df['transmission'].str.lower()
    # Extract horsepower, engine size, cylinders, and transmission speed
    df['horsepower'] = df['engine'].str.extract(r'(\d+\.\d+)(?=HP)').astype(float)
    df['engine_size'] = df['engine'].str.extract(r'(\d+\.\d+)(?=L)').astype(float)
    df['cylinders'] = df['engine'].str.extract(r'(\d+)\s(Cylinder|V\d|Straight)')[0].astype(float)
#     df['transmission_speed'] = df['transmission'].str.extract(r'(\d+)(?=-)').astype(float)
    
    # Classify transmission type
    df['transmission_type'] = df['transmission'].apply(lambda x: 
                                                       'manual' if 'm/t' in x or 'manual' in x or  'mt' in x else 
                                                       'automatic' if 'a/t' in x or 'automatic' in x or  'at' in x else 
                                                       'CVT' if 'CVT' in x else 
                                                       'Other')
    
    return df

# Apply the function
df_train = extract_data(df_train)
df_test = extract_data(df_test)  

# Print specific columns to check results
print(df_train[['transmission', 'transmission_type']])

# Check the shape to ensure no columns are removed
print(df_train.shape)

In [None]:
# Define a function to extract fuel type from the engine column
def extract_fuel_type(engine_info):
    if pd.isna(engine_info):
        return np.nan
    if 'Gasoline' in engine_info:
        return 'Gasoline'
    elif 'Hybrid' in engine_info:
        return 'Hybrid'
    elif 'Flex Fuel' in engine_info or 'E85' in engine_info:
        return 'Flex Fuel'
    elif 'Diesel' in engine_info:
        return 'Diesel'
    elif 'Electric' in engine_info:
        return 'Electric'
    else:
        return np.nan

df_train['extracted_fuel_type'] = df_train['engine'].apply(extract_fuel_type)

df_train['fuel_type'].fillna(df_train['extracted_fuel_type'], inplace=True)

df_train.drop(columns=['extracted_fuel_type'], inplace=True)
df_test['extracted_fuel_type'] = df_test['engine'].apply(extract_fuel_type)

df_test['fuel_type'].fillna(df_test['extracted_fuel_type'], inplace=True)

df_test.drop(columns=['extracted_fuel_type'], inplace=True)

In [None]:
def fill_clean_title(row):
    if pd.isna(row['clean_title']):
        if row['accident'] == 0:
            return True
        elif row['accident'] == 1:
            return False
    return row['clean_title']

# Apply the function to each row
df_train['clean_title'] = df_train.apply(fill_clean_title, axis=1)
df_test['clean_title'] = df_test.apply(fill_clean_title, axis=1)

In [None]:
def feature(df):
    current_year = datetime.now().year
    df['Vehicle_Age'] = current_year - df['model_year']
    df['Mileage_per_Year'] = df['milage'] / (df['Vehicle_Age']+10e-5)
    luxury_brands =   [
    "Mercedes-Benz", "BMW", "Audi", "Porsche", "Land Rover", 
    "Lexus", "Cadillac", "Tesla", "INFINITI", "Jaguar", 
    "Bentley", "Maserati", "Lamborghini", "Genesis", "Rolls-Royce", 
    "Ferrari", "McLaren", "Aston Martin", "Lucid", "Lotus", 
    "Karma", "Bugatti", "Maybach"    ]
    df['Is_Luxury_Brand'] = df['brand'].apply(lambda x: 1 if x in luxury_brands else 0)
    return df

df_train = feature(df_train)
df_test = feature(df_test)

In [None]:
def extract_base_colors(df, column_name):
    # Define a list of common base colors
    base_colors = [
        'black', 'white', 'gray', 'silver', 'brown', 'red', 'blue', 'green',
        'beige', 'tan', 'orange', 'gold', 'yellow', 'purple', 'pink', 
        'charcoal', 'ivory', 'camel', 'chestnut', 'pearl', 'linen', 'graphite',
        'copper', 'slate', 'bronze', 'sand', 'amber','macchiato','ebony','cocoa'
    ]
    df[column_name] = df[column_name].str.lower()

    def find_base_color(text):
        for color in base_colors:
            if color in text:
                return color
        return text  
    
    df['int_col'] = df[column_name].apply(find_base_color)
    df['ext_col'] = df[column_name].apply(find_base_color)

    return df
df_train=extract_base_colors(df_train,'int_col')
df_test=extract_base_colors(df_test,'int_col')

In [None]:
df_train=df_train.drop(columns=['transmission','engine','transmission_type','model'],axis=1)
df_test=df_test.drop(columns=['transmission','engine','transmission_type','model'],axis=1)

In [None]:
cat_feats = ['brand',  'fuel_type',  'ext_col', 'int_col', 'clean_title']  
numeric_feats = ['milage', 'horsepower', 'engine_size', 'cylinders','accident','Is_Luxury_Brand','Mileage_per_Year','Vehicle_Age','cylinders']

In [None]:
def cleaning(df, cat_feats, threshold=101):
    for i in cat_feats:
        if df[i].dtype.name == 'category':
            if 'missing' not in df[i].cat.categories:
                df[i] = df[i].cat.add_categories('missing')
            if 'noise' not in df[i].cat.categories:
                df[i] = df[i].cat.add_categories('noise')
        else:
            df[i] = df[i].astype('category')
            df[i] = df[i].cat.add_categories(['missing', 'noise'])
        
        df[i] = df[i].fillna('missing')
        
        count = df[i].value_counts(dropna=False)
        less_freq = count[count < threshold].index
        
        df[i] = df[i].apply(lambda x: 'noise' if x in less_freq else x)
    
    return df
df_train = cleaning(df_train, cat_feats)
df_test = cleaning(df_test, cat_feats) 

In [None]:
def extract_cleaning_binary(df):
    
    mapping = {
        'Yes': 1,
        'missing': 0,
        'noise': 0,
        True:1,
        False:0
    }
    df['clean_title'] = df['clean_title'].map(mapping)
    return df

# Apply the function to the dataframe
df_train = extract_cleaning_binary(df_train)
df_test = extract_cleaning_binary(df_test)

In [None]:
for i in cat_feats:
    df_train[i] = df_train[i].astype('category')
    df_test[i] = df_test[i].astype('category')

In [None]:
# Convert columns to numeric
df_train['horsepower'] = pd.to_numeric(df_train['horsepower'], errors='coerce')
df_train['engine_size'] = pd.to_numeric(df_train['engine_size'], errors='coerce')
df_train['cylinders'] = pd.to_numeric(df_train['cylinders'], errors='coerce')
df_train['Vehicle_Age'] = pd.to_numeric(df_train['Vehicle_Age'], errors='coerce')
df_train['Mileage_per_Year'] = pd.to_numeric(df_train['Mileage_per_Year'], errors='coerce')
df_train['price'] = pd.to_numeric(df_train['price'], errors='coerce')

# Convert categorical columns to numeric using Label Encoding
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df_train['brand'] = le.fit_transform(df_train['brand'])
df_train['fuel_type'] = le.fit_transform(df_train['fuel_type'])
df_train['ext_col'] = le.fit_transform(df_train['ext_col'])
df_train['int_col'] = le.fit_transform(df_train['int_col'])
df_train['clean_title'] = le.fit_transform(df_train['clean_title'])

# Handle missing values if any
df_train.fillna(df_train.mean(), inplace=True)

In [None]:
# Convert columns to numeric
df_test['horsepower'] = pd.to_numeric(df_test['horsepower'], errors='coerce')
df_test['engine_size'] = pd.to_numeric(df_test['engine_size'], errors='coerce')
df_test['cylinders'] = pd.to_numeric(df_test['cylinders'], errors='coerce')
df_test['Vehicle_Age'] = pd.to_numeric(df_test['Vehicle_Age'], errors='coerce')
df_test['Mileage_per_Year'] = pd.to_numeric(df_test['Mileage_per_Year'], errors='coerce')

# Convert categorical columns to numeric using Label Encoding
le = LabelEncoder()
df_test['brand'] = le.fit_transform(df_test['brand'])
df_test['fuel_type'] = le.fit_transform(df_test['fuel_type'])
df_test['ext_col'] = le.fit_transform(df_test['ext_col'])
df_test['int_col'] = le.fit_transform(df_test['int_col'])
df_test['clean_title'] = le.fit_transform(df_test['clean_title'])

# Handle missing values if any
df_test.fillna(df_test.mean(), inplace=True)

In [None]:
df_train

# Preprocessing

In [None]:
X = df_train.drop(['price'], axis=1)
y = df_train['price']

X.shape,y.shape

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.02, random_state=12)

# XGBoost

In [None]:
def cross_validate_model_x(model, X_train, y_train, X_valid, y_valid, params, n_splits=5):

    # Initialize variables
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    val_scores = []
    test_preds = np.zeros((len(df_test), n_splits), dtype=np.float32)
    # Cross-validation loop
    for fold, (train_ind, valid_ind) in enumerate(cv.split(X_train)):
        # Data splitting
        X_fold_train = X.iloc[train_ind]
        y_fold_train = y.iloc[train_ind]
        X_val = X.iloc[valid_ind]
        y_val = y.iloc[valid_ind]
        
        # Model initialization and training
        clf = model(**params, enable_categorical=True)
        clf.fit(X_fold_train, y_fold_train,  eval_set=[(X_val, y_val)],verbose=500, early_stopping_rounds=50)
        # Predict and evaluate
        test_preds[:, fold] = clf.predict(df_test)

        print("-" * 50)
        print(test_preds)
    
    # Train on full X_train after cross-validation
    clf.fit(X_train, y_train)
    
    # Predict on X_valid
    valid_preds = clf.predict(X_valid)
    
    # Evaluate performance on X_valid
    valid_score = mean_squared_error(y_valid, valid_preds, squared=False)  # RMSE evaluation
    print(f"Validation RMSE: {valid_score}")
    
    # Average test predictions from cross-validation
    test_preds = np.mean(test_preds, axis=1)
    
    return clf, test_preds, valid_score

In [None]:
# Define the objective function for Optuna
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 200, 10000),
        'max_depth': trial.suggest_int('max_depth', 10, 50),
        'min_child_weight': trial.suggest_int('min_child_weight', 50, 100),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.4, 1.0),
        'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
        'lambda': trial.suggest_loguniform('lambda', 1e-8, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-8, 10.0),
        'booster': 'gbtree',
        'objective':'reg:squarederror',
        'eval_metric':'rmse',
        'random_state': 42
    }
    
    rmse_scores = []

    X_train_split, X_valid_split, y_train_split, y_valid_split = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

    # Train XGBoost model with current hyperparameters
    clf = XGBRegressor(**params, use_label_encoder=False, enable_categorical=True,tree_method='hist',device= 'cuda')
    clf.fit(X_train_split, y_train_split)

    # Predict probabilities on validation set
    y_pred = clf.predict(X_valid_split)

    # Calculate RMSE on validation set
    rmse = mean_squared_error(y_valid_split, y_pred, squared=False)
    return rmse

# Optimize hyperparameters using Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Get best hyperparameters
xgb_best_params = study.best_params
print("Best Hyperparameters:", xgb_best_params)

In [None]:
xgb_params={'booster': 'gbtree',
             'objective':'reg:squarederror',
             'eval_metric':'rmse',
             'random_state': 42,
             'lambda': 0.03880258557285165,
             'alpha': 0.02129832295514386,
             'colsample_bytree': 0.4,
             'subsample': 0.7,
             'learning_rate': 0.014,
             'max_depth': 17,
             'min_child_weight': 85,
             'n_estimators': 10000
} 
print('XGBoost Cross-Validation Results:\n')
xgb_model, test_predsx, xgb_val_score = cross_validate_model_x(XGBRegressor, X_train, y_train, X_valid, y_valid, xgb_best_params)

In [None]:
xgb_result =  pd.read_csv('/kaggle/input/playground-series-s4e9/sample_submission.csv')
xgb_result['price'] = test_predsx.astype(np.float32)
xgb_result

# LGBM

In [None]:
def cross_validate_model_l(model, X_train, y_train, X_valid, y_valid, params, n_splits=5):

    # Initialize variables
    callbacks = [log_evaluation(period=150), early_stopping(stopping_rounds=200)]
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    val_scores = []
    test_preds = np.zeros((len(df_test), n_splits), dtype=np.float32)
    # Cross-validation loop
    for fold, (train_ind, valid_ind) in enumerate(cv.split(X_train)):
        # Data splitting
        X_fold_train = X_train.iloc[train_ind]
        y_fold_train = y_train.iloc[train_ind]
        X_val = X_train.iloc[valid_ind]
        y_val = y_train.iloc[valid_ind]
        
        # Model initialization and training
        clf = model(**params)
        #clf.fit(X_fold_train, y_fold_train)
        clf.fit(X_fold_train, y_fold_train, eval_set=[(X_val, y_val)], callbacks=callbacks )
        # Predict and evaluate
        test_preds[:, fold] = clf.predict(df_test)

        print("-" * 50)
        print(test_preds)
        
    # Predict on X_valid
    valid_preds = clf.predict(X_valid)
    
    # Evaluate performance on X_valid
    valid_score = mean_squared_error(y_valid, valid_preds, squared=False)  # RMSE evaluation
    print(f"Validation RMSE: {valid_score}")
    
    # Average test predictions from cross-validation
    test_preds = np.mean(test_preds, axis=1)
    
    return clf, test_preds, valid_score

In [None]:
# Define the objective function for Optuna
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 200, 1000),
        'num_leaves': trial.suggest_int('num_leaves', 32, 200),
        'max_depth': trial.suggest_int('max_depth', 10, 50),
        'min_child_weight': trial.suggest_int('min_child_weight', 50, 100),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.4, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),  
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
        'verbose':-1
    }

    # Train-test split
    X_train_split, X_valid_split, y_train_split, y_valid_split = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

    # Initialize LGBM model
    clf = LGBMRegressor(**params, objective='regression', random_state=42)

    # Fit the model with early stopping and evaluation
    clf.fit(X_train_split, y_train_split, eval_set=[(X_valid_split, y_valid_split)], eval_metric='rmse')

    # Predict on the validation set
    y_pred = clf.predict(X_valid_split)

    # Calculate RMSE
    rmse = mean_squared_error(y_valid_split, y_pred, squared=False)
    return rmse

# Optimize hyperparameters using Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Get best hyperparameters
lgb_best_params = study.best_params
print("Best Hyperparameters:", lgb_best_params)

In [None]:
lgb_params = {
    'learning_rate': 0.017521301504983752,
    'max_depth': 42,
    'reg_alpha': 0.06876635751774487, 
    'reg_lambda': 9.738899198284985,
    'num_leaves': 131,
    'subsample': 0.2683765421728044,
    'colsample_bytree': 0.44346036599709887,
    'n_estimators': 1000,
    'random_state': 42          
}

print('LightGBM Cross-Validation Results:\n')
lgb_model,test_predsl, lgb_val_score = cross_validate_model_l(LGBMRegressor, X_train, y_train, X_valid, y_valid, lgb_best_params)

In [None]:
lgb_result =  pd.read_csv('/kaggle/input/playground-series-s4e9/sample_submission.csv')
lgb_result['price'] = test_predsl.astype(np.float32)
lgb_result

# CatBoost

In [None]:
def cross_validate_model_c(model, X_train, y_train, X_valid, y_valid, params, n_splits=5):

    # Initialize variables
    
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=0)
    val_scores = []
    test_preds = np.zeros((len(df_test), n_splits), dtype=np.float32)
    # Cross-validation loop
    for fold, (train_ind, valid_ind) in enumerate(cv.split(X_train)):
        # Data splitting
        X_fold_train = X_train.iloc[train_ind]
        y_fold_train = y_train.iloc[train_ind]
        X_val = X_train.iloc[valid_ind]
        y_val = y_train.iloc[valid_ind]
        
        # Model initialization and training
        clf = model(**params)
        #clf.fit(X_fold_train, y_fold_train)
        clf.fit(X_fold_train, y_fold_train, eval_set=[(X_val, y_val)], verbose=0)
        # Predict and evaluate
        test_preds[:, fold] = clf.predict(df_test)

        print("-" * 50)
        print(test_preds)
    
    # Predict on X_valid
    valid_preds = clf.predict(X_valid)
    
    # Evaluate performance on X_valid
    valid_score = mean_squared_error(y_valid, valid_preds, squared=False)  # RMSE evaluation
    print(f"Validation RMSE: {valid_score}")
    
    # Average test predictions from cross-validation
    test_preds = np.mean(test_preds, axis=1)
    
    return clf, test_preds, valid_score

In [None]:
from catboost import CatBoostRegressor
cat_cols = df_train.select_dtypes(include=['object', 'category']).columns.tolist()

# Define the objective function for Optuna
def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 200, 1000),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-8, 10.0),
        'random_strength': trial.suggest_loguniform('random_strength', 1e-8, 10.0),
        'bootstrap_type': trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli', 'MVS']),
        'random_seed': 42,
        'early_stopping_rounds':50,
        'cat_features': cat_cols
    }

    X_train_split, X_valid_split, y_train_split, y_valid_split = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

    # Train CatBoost model with current hyperparameters
    clf = CatBoostRegressor(**params, silent=True)
    clf.fit(X_train_split, y_train_split, eval_set=[(X_valid_split, y_valid_split)])

    # Predict probabilities on validation set
    y_pred = clf.predict(X_valid_split)

    # Calculate RMSE on validation set
    rmse = mean_squared_error(y_valid_split, y_pred, squared=False)
    return rmse

# Optimize hyperparameters using Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Get best hyperparameters
cat_best_params = study.best_params
print("Best Hyperparameters:", cat_best_params)

In [None]:
cat_params = {
    'learning_rate':0.042,
    'iterations':1000,
    'depth':10,
    'random_strength':0,
    'cat_features':cat_cols,
    'l2_leaf_reg':0.3,
    'random_seed':42,
    'early_stopping_rounds': 200,                     
}

print('CatBoost Cross-Validation Results:\n')
cat_model, test_predsc, cat_val_score = cross_validate_model_c(CatBoostRegressor, X_train, y_train, X_valid, y_valid, cat_best_params)

In [None]:
cat_result =  pd.read_csv('/kaggle/input/playground-series-s4e9/sample_submission.csv')
cat_result['price'] = test_predsc.astype(np.float32)
cat_result

# Evaluate

In [None]:
print('XGBoost Validation RMSE',xgb_val_score)
print('LGBM Validation RMSE',lgb_val_score)
print('CatBoost Validation RMSE',cat_val_score)

# Ensemble

In [None]:
pred = pd.concat([lgb_result,cat_result,xgb_result], axis=1)['price']
print(pred)
test_preds = test_predsl * 0.6 + test_predsc * 0.3 + test_predsx*0.1

# Submission

In [None]:
df_sub = pd.read_csv('/kaggle/input/playground-series-s4e9/sample_submission.csv')
df_sub['price'] = test_preds
df_sub.to_csv('submission.csv',index=False)