# **Classical Algorithm : Elastic Net Regression**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [None]:
# 1. Mount the dataset
# Assuming the dataset is in a CSV file, replace the path with the actual path
dataset_path = '/content/drive/MyDrive/MiniProject_BlackHoleMassEstimation/typeII_AGN_metadata.csv'
data = pd.read_csv(dataset_path)

In [None]:
# 2. Define target and feature columns
target_column = 'log_bh_mass'
feature_columns = [
    'h_beta_flux', 'h_beta_flux_err', 'oiii_5007_flux', 'oiii_5007_flux_err',
    'h_alpha_flux', 'h_alpha_flux_err', 'nii_6584_flux', 'nii_6584_flux_err',
    'log_stellar_sigma', 'psfMag_u', 'psfMag_g', 'psfMag_r', 'psfMag_i',
    'psfMag_z', 'psfMagErr_u', 'psfMagErr_g', 'psfMagErr_r', 'psfMagErr_i',
    'psfMagErr_z', 'mendel_logM_p50', 'mendel_logM_p16', 'mendel_logM_p84',
    'mendel_logMt_p50', 'mendel_logMt_p16', 'mendel_logMt_p84', 'mendel_logMb_p50',
    'mendel_logMb_p16', 'mendel_logMb_p84', 'mendel_logMd_p50', 'mendel_logMd_p16',
    'mendel_logMd_p84', 'simard_b_t_g', 'simard_e_b_t_g', 'simard_b_t_r', 'simard_e_b_t_r',
    'simard_Rhlg', 'simard_Rhlr', 'simard_Rchl_g', 'simard_Rchl_r', 'simard_Re',
    'simard_e_Re', 'simard_e', 'simard_e_e', 'simard_nb', 'simard_e_nb', 'simard_PpS',
    'simard_Pn4'
]

In [None]:
# 6. Handling Missing/ NaN values with the mean of the column for the target and features column
data.fillna(data.mean(), inplace=True)

# 3. Normalize the features
scaler = StandardScaler()
data[feature_columns] = scaler.fit_transform(data[feature_columns])

X = data[feature_columns]
y = data[target_column]

In [None]:
# 5. Run the algorithms with 5-fold validation with 80:20 train-test
kf = KFold(n_splits=5, shuffle=True, random_state=42)
r2_scores, mae_scores, rmse_scores, mse_scores = [], [], [], []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model = ElasticNet(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    r2_scores.append(r2_score(y_test, y_pred))
    mae_scores.append(mean_absolute_error(y_test, y_pred))
    mse_scores.append(mean_squared_error(y_test, y_pred))
    rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))

In [None]:
# Calculate mean and standard deviation for each error metric
r2_mean, r2_std = np.mean(r2_scores), np.std(r2_scores)
mae_mean, mae_std = np.mean(mae_scores), np.std(mae_scores)
mse_mean, mse_std = np.mean(mse_scores), np.std(mse_scores)
rmse_mean, rmse_std = np.mean(rmse_scores), np.std(rmse_scores)

# Calculate accuracy by error
range_of_y = y.max() - y.min()
mae_accuracy = (1 - mae_mean / range_of_y) * 100
mse_accuracy = (1 - mse_mean / range_of_y) * 100
rmse_accuracy = (1 - rmse_mean / range_of_y) * 100

In [None]:
# Print error metrics
print(f'R²: {r2_mean:.4f} ± {r2_std:.4f}')
print(f'MAE: {mae_mean:.4f} ± {mae_std:.4f}, Accuracy: {mae_accuracy:.2f}%')
print(f'MSE: {mse_mean:.4f} ± {mse_std:.4f}, Accuracy: {mse_accuracy:.2f}%')
print(f'RMSE: {rmse_mean:.4f} ± {rmse_std:.4f}, Accuracy: {rmse_accuracy:.2f}%')

R²: -0.0002 ± 0.0003
MAE: 0.4954 ± 0.0039, Accuracy: 90.18%
MSE: 0.4052 ± 0.0053, Accuracy: 91.96%
RMSE: 0.6365 ± 0.0042, Accuracy: 87.38%
