Load the training, testing, and sample data from CSV files.

In [None]:
import pandas as pd
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_sample = pd.read_csv('sample.csv')

Preprocess data using One-Hot Encoding for categorical features.

In [None]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
train_ohe = encoder.fit_transform(df_train[['categorical_column']]).toarray()
test_ohe = encoder.transform(df_test[['categorical_column']]).toarray()

Combine One-Hot Encoded features with numerical features.

In [None]:
import numpy as np
train_x = np.concatenate([train_ohe, df_train[['numerical_column']].values], axis=1)
test_x = np.concatenate([test_ohe, df_test[['numerical_column']].values], axis=1)

Train the LightGBM model using the prepared training features.

In [None]:
from lightgbm import LGBMRegressor
model = LGBMRegressor()
model.fit(train_x, df_train['target'])

Optimize model parameters with Optuna's TunerCV.

In [None]:
from optuna import create_study
study = create_study()
study.optimize(lambda trial: ... , n_trials=100)

Calculate feature importance from the trained model.

In [None]:
importances = model.feature_importances_

Evaluate the model by calculating the RMSE on the test dataset.

In [None]:
from sklearn.metrics import mean_squared_error
preds = model.predict(test_x)
rmse = np.sqrt(mean_squared_error(df_test['target'], preds))

Prepare and save the submission file as a CSV.

In [None]:
submission = pd.DataFrame({'Id': df_test['Id'], 'Predicted': preds})
submission.to_csv('submission_optuna_lgbm_ohe_v1.csv', index=False)