In [None]:
import pandas as pd
import numpy as np

from scipy import stats

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

from lightgbm import LGBMRegressor
from skopt import BayesSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import ElasticNet,LinearRegression

from sklearn.metrics import mean_absolute_error,r2_score
from  scipy.stats import pearsonr
from xgboost import XGBRegressor

In [None]:
from sklearn.metrics import mean_absolute_error,make_scorer

def mae_exp(y_true_log, y_pred_log):
    y_true = np.exp(y_true_log)
    y_pred = np.exp(y_pred_log)
    return mean_absolute_error(y_true, y_pred)

# Create a scorer from the custom scoring function
mae_exp_scorer = make_scorer(mae_exp, greater_is_better=False)

In [None]:

from sklearn.model_selection import train_test_split

# Load your data
dataframe = pd.read_csv(fr'C:\Users\User\OneDrive\Desktop\predicting-brain-age\Data\original.csv')


In [None]:
pipeline = Pipeline([
    ('scaler',StandardScaler()),
    ('pca',PCA(svd_solver='full')),
    ('lgbm',LGBMRegressor())
])

In [None]:
param_grid = {
    'pca__n_components': [0.80,0.85,0.90,0.95],  # Number of components to keep
    # 'elasticnet__alpha': np.logspace(-4, -0.5, 20),
    # 'elasticnet__l1_ratio' : np.logspace(-4 , - 0.5, 20)
}

In [None]:
# Define the search using BayesSearchCV
bayes_search = BayesSearchCV(
    estimator=pipeline,
    search_spaces=param_grid,
    n_iter=32,  # Number of parameter settings that are sampled
    cv=10,       # 5-fold cross-validation
    scoring='neg_mean_absolute_error',  # Can change based on what you care about
    random_state=42
)

In [None]:
# Load your data here
X = dataframe.drop('age', axis=1)
y = (dataframe['age'])

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform the search
bayes_search.fit(x_train, y_train)

# Best parameters and best score
print("Best parameters:", bayes_search.best_params_)


In [None]:
best_pipeline = bayes_search.best_estimator_
best_pca = best_pipeline.named_steps['pca']
components = best_pca.components_
len(components)

In [None]:
best_pipeline.fit(x_train,y_train)
y_pred = best_pipeline.predict(x_test)
print(mean_absolute_error((y_pred),(y_test)))
print("Best cross-validation score: {:.2f}".format(bayes_search.best_score_))
print('linear regression  r2: ',(r2_score(y_test,y_pred)))
print('linear regression pearson: ',(pearsonr(y_test,y_pred)))

In [None]:
from sklearn.model_selection import cross_val_score
pipeline2 = Pipeline([
    ('scaler',StandardScaler()),
    ('pca',PCA(svd_solver='full',n_components=0.85)),
    ('lgbm',LGBMRegressor())
])

pipeline2.fit(x_train,y_train)
cv_lgbm = cross_val_score(pipeline2, x_train, y_train, cv=10, scoring='neg_mean_absolute_error')


y_pred = pipeline2.predict(x_test)
print(mean_absolute_error((y_pred),(y_test)))
print("Best cross-validation score: ",-np.mean(cv_lgbm))
print('linear regression  r2: ',(r2_score(y_test,y_pred)))
print('linear regression pearson: ',(pearsonr(y_test,y_pred)))
