Load the training, test, and sample submission datasets using Pandas.

In [None]:
import pandas as pd

df_train = pd.read_csv('df_train.csv')
df_test = pd.read_csv('df_test.csv')
df_sample_submission = pd.read_csv('df_sample_submission.csv')

Generate a statistical overview of the training dataset.

In [None]:
df_overview = df_train.describe()

Clean the dataset by removing rows with missing values.

In [None]:
df_cleaned = df_train.dropna()

Encode categorical features using one-hot encoding.

In [None]:
df_encoded = pd.get_dummies(df_cleaned, drop_first=True)

Create a correlation heatmap to visualize relationships between features.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.heatmap(df_encoded.corr())
plt.show()

Select the top features based on statistical tests.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

X = df_encoded.drop('target', axis=1)
y = df_encoded['target']
selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)

Split the data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

Train the model using the XGBoost algorithm.

In [None]:
from xgboost import XGBRegressor

model = XGBRegressor()
model.fit(X_train, y_train)

Evaluate the model performance using Mean Absolute Error.

In [None]:
from sklearn.metrics import mean_absolute_error

predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)

Retrieve the best parameters used in training the model.

In [None]:
best_params = model.get_params()

Train the final model using the best parameters on the entire dataset.

In [None]:
final_model = XGBRegressor(**best_params)
final_model.fit(X_selected, y)

Prepare and save the submission dataset.

In [None]:
submission = pd.DataFrame({'Id': df_test['Id'], 'Prediction': final_model.predict(df_test)})
submission.to_csv('submission.csv', index=False)