In [1]:
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_csv('babies(preprocessed).csv')

In [3]:
dataset

Unnamed: 0,age,gestation,parity,height,weight,smoke,bwt
0,27.0,284.000000,0,62.0,100.0,0.0,120
1,33.0,282.000000,0,64.0,135.0,0.0,113
2,28.0,279.000000,0,64.0,115.0,1.0,128
3,36.0,279.338512,0,69.0,190.0,0.0,123
4,23.0,282.000000,0,67.0,125.0,1.0,108
...,...,...,...,...,...,...,...
1231,27.0,275.000000,1,60.0,100.0,0.0,113
1232,24.0,265.000000,0,67.0,120.0,0.0,128
1233,30.0,291.000000,0,65.0,150.0,1.0,130
1234,21.0,281.000000,1,65.0,110.0,0.0,125


In [4]:
dataset.isnull().sum()

age          0
gestation    0
parity       0
height       0
weight       0
smoke        0
bwt          0
dtype: int64

In [5]:
X = dataset.drop('bwt', axis=1)
y = dataset['bwt']

# 1. Logistic Regression

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    model = LinearRegression()
    model = model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

______________________________________
Fold 1:
Mean Squared Error (MSE): 249.63
Mean Absolute Error (MAE): 12.34
R-squared (R2): 0.28
Peak Signal-to-Noise Ratio (PSNR): 20.94 dB
Signal-to-Noise Ratio (SNR): 48.60 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 233.05
Mean Absolute Error (MAE): 12.04
R-squared (R2): 0.18
Peak Signal-to-Noise Ratio (PSNR): 21.24 dB
Signal-to-Noise Ratio (SNR): 48.90 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 246.24
Mean Absolute Error (MAE): 12.27
R-squared (R2): 0.18
Peak Signal-to-Noise Ratio (PSNR): 21.00 dB
Signal-to-Noise Ratio (SNR): 48.66 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 284.32
Mean Absolute Error (MAE): 13.22
R-squared (R2): 0.10
Peak Signal-to-Noise Ratio (PSNR): 20.37 dB
Signal-to-Noise Ratio (SNR): 48.03 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 247.53
Mean Absolute Error (MAE): 12.29
R-squared (R2): 0.26
Pe

# 1. Bagging

In [7]:
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    # Create a Bagging Regressor
    bagging_model = BaggingRegressor(base_estimator=rf_model, n_estimators=10, random_state=42)
    
    # Train the Bagging Regressor
    bagging_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = bagging_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')



______________________________________
Fold 1:
Mean Squared Error (MSE): 331.01
Mean Absolute Error (MAE): 14.14
R-squared (R2): 0.05
Peak Signal-to-Noise Ratio (PSNR): 19.71 dB
Signal-to-Noise Ratio (SNR): 47.37 dB




______________________________________
Fold 2:
Mean Squared Error (MSE): 332.68
Mean Absolute Error (MAE): 14.37
R-squared (R2): -0.16
Peak Signal-to-Noise Ratio (PSNR): 19.69 dB
Signal-to-Noise Ratio (SNR): 47.35 dB




______________________________________
Fold 3:
Mean Squared Error (MSE): 305.39
Mean Absolute Error (MAE): 14.16
R-squared (R2): -0.02
Peak Signal-to-Noise Ratio (PSNR): 20.06 dB
Signal-to-Noise Ratio (SNR): 47.72 dB




______________________________________
Fold 4:
Mean Squared Error (MSE): 352.51
Mean Absolute Error (MAE): 14.84
R-squared (R2): -0.11
Peak Signal-to-Noise Ratio (PSNR): 19.44 dB
Signal-to-Noise Ratio (SNR): 47.10 dB




______________________________________
Fold 5:
Mean Squared Error (MSE): 315.42
Mean Absolute Error (MAE): 14.10
R-squared (R2): 0.05
Peak Signal-to-Noise Ratio (PSNR): 19.92 dB
Signal-to-Noise Ratio (SNR): 47.58 dB




______________________________________
Fold 6:
Mean Squared Error (MSE): 323.35
Mean Absolute Error (MAE): 13.69
R-squared (R2): -0.06
Peak Signal-to-Noise Ratio (PSNR): 19.81 dB
Signal-to-Noise Ratio (SNR): 47.48 dB




______________________________________
Fold 7:
Mean Squared Error (MSE): 292.60
Mean Absolute Error (MAE): 13.54
R-squared (R2): 0.03
Peak Signal-to-Noise Ratio (PSNR): 20.25 dB
Signal-to-Noise Ratio (SNR): 47.91 dB




______________________________________
Fold 8:
Mean Squared Error (MSE): 325.55
Mean Absolute Error (MAE): 14.63
R-squared (R2): 0.04
Peak Signal-to-Noise Ratio (PSNR): 19.78 dB
Signal-to-Noise Ratio (SNR): 47.45 dB




______________________________________
Fold 9:
Mean Squared Error (MSE): 312.95
Mean Absolute Error (MAE): 13.91
R-squared (R2): 0.11
Peak Signal-to-Noise Ratio (PSNR): 19.96 dB
Signal-to-Noise Ratio (SNR): 47.62 dB




______________________________________
Fold 10:
Mean Squared Error (MSE): 281.98
Mean Absolute Error (MAE): 13.40
R-squared (R2): 0.18
Peak Signal-to-Noise Ratio (PSNR): 20.41 dB
Signal-to-Noise Ratio (SNR): 48.07 dB

Average Metrics Across Folds:
Mean Squared Error (MSE): 317.35
Mean Absolute Error (MAE): 14.08
R-squared (R2): 0.01
Peak Signal-to-Noise Ratio (PSNR): 19.90 dB
Signal-to-Noise Ratio (SNR): 47.57 dB


# 2. Random Forest

In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    # Create a Random Forest Regressor
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    # Train the Random Forest Regressor
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = rf_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')

______________________________________
Fold 1:
Mean Squared Error (MSE): 395.96
Mean Absolute Error (MAE): 15.46
R-squared (R2): -0.14
Peak Signal-to-Noise Ratio (PSNR): 18.93 dB
Signal-to-Noise Ratio (SNR): 46.60 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 405.63
Mean Absolute Error (MAE): 15.99
R-squared (R2): -0.42
Peak Signal-to-Noise Ratio (PSNR): 18.83 dB
Signal-to-Noise Ratio (SNR): 46.49 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 366.84
Mean Absolute Error (MAE): 15.34
R-squared (R2): -0.23
Peak Signal-to-Noise Ratio (PSNR): 19.27 dB
Signal-to-Noise Ratio (SNR): 46.93 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 395.37
Mean Absolute Error (MAE): 15.54
R-squared (R2): -0.25
Peak Signal-to-Noise Ratio (PSNR): 18.94 dB
Signal-to-Noise Ratio (SNR): 46.60 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 378.42
Mean Absolute Error (MAE): 15.49
R-squared (R2): -0.

# 3. Voting

In [9]:
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    
    # Create a Voting Regressor
    voting_model = VotingRegressor([('random_forest', rf_model), ('linear_regression', lr_model)])
    
    # Train the Voting Regressor
    voting_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = voting_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')

______________________________________
Fold 1:
Mean Squared Error (MSE): 281.62
Mean Absolute Error (MAE): 13.03
R-squared (R2): 0.19
Peak Signal-to-Noise Ratio (PSNR): 20.41 dB
Signal-to-Noise Ratio (SNR): 48.08 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 281.91
Mean Absolute Error (MAE): 13.19
R-squared (R2): 0.01
Peak Signal-to-Noise Ratio (PSNR): 20.41 dB
Signal-to-Noise Ratio (SNR): 48.07 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 276.35
Mean Absolute Error (MAE): 13.37
R-squared (R2): 0.07
Peak Signal-to-Noise Ratio (PSNR): 20.50 dB
Signal-to-Noise Ratio (SNR): 48.16 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 310.29
Mean Absolute Error (MAE): 13.87
R-squared (R2): 0.02
Peak Signal-to-Noise Ratio (PSNR): 19.99 dB
Signal-to-Noise Ratio (SNR): 47.66 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 277.60
Mean Absolute Error (MAE): 13.12
R-squared (R2): 0.17
Pe

# 4. XGBOOST

In [10]:
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    xgb_model = XGBRegressor(objective='reg:squarederror', random_state=i)  # Use reg:squarederror for regression
    
    # Create a Voting Regressor
    voting_model = VotingRegressor([('random_forest', rf_model), ('linear_regression', lr_model), ('xgboost', xgb_model)])
    
    # Train the Voting Regressor
    voting_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = voting_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')


______________________________________
Fold 1:
Mean Squared Error (MSE): 283.47
Mean Absolute Error (MAE): 13.18
R-squared (R2): 0.19
Peak Signal-to-Noise Ratio (PSNR): 20.39 dB
Signal-to-Noise Ratio (SNR): 48.05 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 277.66
Mean Absolute Error (MAE): 13.11
R-squared (R2): 0.03
Peak Signal-to-Noise Ratio (PSNR): 20.48 dB
Signal-to-Noise Ratio (SNR): 48.14 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 273.09
Mean Absolute Error (MAE): 13.34
R-squared (R2): 0.09
Peak Signal-to-Noise Ratio (PSNR): 20.55 dB
Signal-to-Noise Ratio (SNR): 48.21 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 299.74
Mean Absolute Error (MAE): 13.74
R-squared (R2): 0.05
Peak Signal-to-Noise Ratio (PSNR): 20.14 dB
Signal-to-Noise Ratio (SNR): 47.81 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 279.46
Mean Absolute Error (MAE): 13.04
R-squared (R2): 0.16
Pe

# 5. Stacking

In [11]:
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    xgb_model = XGBRegressor(objective='reg:squarederror', random_state=i)  # Use reg:squarederror for regression
    
    # Create a Stacking Regressor
    stacking_model = StackingRegressor(
        estimators=[('random_forest', rf_model), ('linear_regression', lr_model), ('xgboost', xgb_model)],
        final_estimator=LinearRegression()  # You can use a different final estimator
    )
    
    # Train the Stacking Regressor
    stacking_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = stacking_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')


______________________________________
Fold 1:
Mean Squared Error (MSE): 250.14
Mean Absolute Error (MAE): 12.44
R-squared (R2): 0.28
Peak Signal-to-Noise Ratio (PSNR): 20.93 dB
Signal-to-Noise Ratio (SNR): 48.59 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 233.71
Mean Absolute Error (MAE): 12.05
R-squared (R2): 0.18
Peak Signal-to-Noise Ratio (PSNR): 21.22 dB
Signal-to-Noise Ratio (SNR): 48.89 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 246.41
Mean Absolute Error (MAE): 12.33
R-squared (R2): 0.17
Peak Signal-to-Noise Ratio (PSNR): 20.99 dB
Signal-to-Noise Ratio (SNR): 48.66 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 279.01
Mean Absolute Error (MAE): 13.19
R-squared (R2): 0.12
Peak Signal-to-Noise Ratio (PSNR): 20.45 dB
Signal-to-Noise Ratio (SNR): 48.12 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 248.44
Mean Absolute Error (MAE): 12.26
R-squared (R2): 0.26
Pe

# 6. GBDT

In [12]:
from sklearn.ensemble import RandomForestRegressor, StackingRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    from sklearn.preprocessing import StandardScaler
    # Standardize the features using StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    # Apply Linear Discriminant Analysis (LDA)
    lda = LDA(n_components=1)
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test)
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    gbdt_model = GradientBoostingRegressor(n_estimators=100, random_state=i)  # Use GradientBoostingRegressor for GBDT
    
    # Create a Stacking Regressor
    stacking_model = StackingRegressor(
        estimators=[('random_forest', rf_model), ('linear_regression', lr_model), ('gbdt', gbdt_model)],
        final_estimator=LinearRegression()  # You can use a different final estimator
    )
    
    # Train the Stacking Regressor
    stacking_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = stacking_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')


______________________________________
Fold 1:
Mean Squared Error (MSE): 252.40
Mean Absolute Error (MAE): 12.35
R-squared (R2): 0.27
Peak Signal-to-Noise Ratio (PSNR): 20.89 dB
Signal-to-Noise Ratio (SNR): 48.55 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 227.00
Mean Absolute Error (MAE): 11.87
R-squared (R2): 0.21
Peak Signal-to-Noise Ratio (PSNR): 21.35 dB
Signal-to-Noise Ratio (SNR): 49.01 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 243.42
Mean Absolute Error (MAE): 12.31
R-squared (R2): 0.18
Peak Signal-to-Noise Ratio (PSNR): 21.05 dB
Signal-to-Noise Ratio (SNR): 48.71 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 287.94
Mean Absolute Error (MAE): 13.40
R-squared (R2): 0.09
Peak Signal-to-Noise Ratio (PSNR): 20.32 dB
Signal-to-Noise Ratio (SNR): 47.98 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 239.09
Mean Absolute Error (MAE): 12.16
R-squared (R2): 0.28
Pe