### Imported Required Liabraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_absolute_error

### Load the Datasets

In [None]:
df_train = pd.read_csv('/kaggle/input/playground-series-s3e16/train.csv')

In [None]:
df_train

In [None]:
df_test = pd.read_csv('/kaggle/input/playground-series-s3e16/test.csv')

In [None]:
df_test

In [None]:
df_sample_submission = pd.read_csv('/kaggle/input/playground-series-s3e16/sample_submission.csv')

In [None]:
df_sample_submission

In [None]:
df_test.info()

In [None]:
df_train.Sex.unique()

In [None]:
df_train.Sex.value_counts()

In [None]:
df_train = df_train[(df_train['Height'] > 0)]

In [None]:
df_train.shape

In [None]:
df_train.head()

In [None]:
df_train = pd.get_dummies(df_train)

In [None]:
df_train.head()

### Created Correlation Heatmap

In [None]:
plt.figure(figsize = (12,9))
sns.heatmap(df_train.corr(), annot = True, cmap = "magma_r")
plt.title("Correlation Heatmap",fontdict = {"fontweight":"bold"})
plt.show()

In [None]:
X  = df_train.drop(['id','Age'], axis = 1).values

In [None]:
y = df_train['Age'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Applied Various Algorithms and Hyperparameters using GridSearchCV

In [None]:
models = {
    'Linear Regression': (LinearRegression(), {}),
    'Ridge': (Ridge(), {'alpha': [0.001, 0.01, 0.1, 1, 10]}),
    'Lasso': (Lasso(), {'alpha': [0.001, 0.01, 0.1, 1, 10]}),
    'Random Forest': (RandomForestRegressor(), {'n_estimators': [10, 50, 100, 200]}),
#     'Support Vector Machine': (SVR(), {'C': [0.1, 1, 10]}),
    'K-Nearest Neighbors': (KNeighborsRegressor(), {'n_neighbors': [3, 5, 7]}),
    'XGBoost': (XGBRegressor(), {'max_depth': [3, 6, 9], 'learning_rate': [0.1, 0.01, 0.001]})
}

In [None]:
for model_name, (model, param_grid) in models.items():
    print(f"Training {model_name}...")
    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_absolute_error', cv=3)
    grid_search.fit(X_train, y_train)

    # Get the best model and its performance on the test set
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Best Parameters for {model_name}: {grid_search.best_params_}")
    print(f"Mean Absolute Error for {model_name}: {mae}\n")

#### XGBoost Regressor is giving lowest Mean Absolute Error

In [None]:
model = XGBRegressor(learning_rate=0.1, max_depth=6, objective='reg:squarederror', random_state=42)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mse = mean_absolute_error(y_test, y_pred)
print(f"Mean Aboslute Error: {mse}")

In [None]:
model.fit(X,y)

In [None]:
df_test.drop(['id'], axis = 1, inplace = True)

In [None]:
df_test = pd.get_dummies(df_test)

In [None]:
df_test

In [None]:
y_pred = model.predict(df_test)

In [None]:
y_pred

In [None]:
solution = pd.DataFrame(y_pred, columns = ["y"])

In [None]:
df = df_sample_submission.join(solution)

In [None]:
df = df.drop(['Age'], axis = 1)

In [None]:
df.rename(columns = {'y':'Age'}, inplace = True)

In [None]:
df.to_csv('solution.csv', header = True, index = True)