In [1]:
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_csv('babies(preprocessed).csv')

In [3]:
dataset

Unnamed: 0,age,gestation,parity,height,weight,smoke,bwt
0,27.0,284.000000,0,62.0,100.0,0.0,120
1,33.0,282.000000,0,64.0,135.0,0.0,113
2,28.0,279.000000,0,64.0,115.0,1.0,128
3,36.0,279.338512,0,69.0,190.0,0.0,123
4,23.0,282.000000,0,67.0,125.0,1.0,108
...,...,...,...,...,...,...,...
1231,27.0,275.000000,1,60.0,100.0,0.0,113
1232,24.0,265.000000,0,67.0,120.0,0.0,128
1233,30.0,291.000000,0,65.0,150.0,1.0,130
1234,21.0,281.000000,1,65.0,110.0,0.0,125


In [4]:
dataset.isnull().sum()

age          0
gestation    0
parity       0
height       0
weight       0
smoke        0
bwt          0
dtype: int64

In [5]:
X = dataset.drop('bwt', axis=1)
y = dataset['bwt']

# 1. Logistic Regression

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_    
    model = LinearRegression()
    model = model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

______________________________________
Fold 1:
Mean Squared Error (MSE): 314.97
Mean Absolute Error (MAE): 13.48
R-squared (R2): 0.09
Peak Signal-to-Noise Ratio (PSNR): 19.93 dB
Signal-to-Noise Ratio (SNR): 47.59 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 262.76
Mean Absolute Error (MAE): 12.53
R-squared (R2): 0.08
Peak Signal-to-Noise Ratio (PSNR): 20.71 dB
Signal-to-Noise Ratio (SNR): 48.38 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 284.15
Mean Absolute Error (MAE): 13.31
R-squared (R2): 0.05
Peak Signal-to-Noise Ratio (PSNR): 20.37 dB
Signal-to-Noise Ratio (SNR): 48.04 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 325.97
Mean Absolute Error (MAE): 14.09
R-squared (R2): -0.03
Peak Signal-to-Noise Ratio (PSNR): 19.78 dB
Signal-to-Noise Ratio (SNR): 47.44 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 299.20
Mean Absolute Error (MAE): 13.34
R-squared (R2): 0.10
P

# 1. Bagging

In [7]:
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    # Create a Bagging Regressor
    bagging_model = BaggingRegressor(base_estimator=rf_model, n_estimators=10, random_state=42)
    
    # Train the Bagging Regressor
    bagging_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = bagging_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')



______________________________________
Fold 1:
Mean Squared Error (MSE): 313.66
Mean Absolute Error (MAE): 13.55
R-squared (R2): 0.10
Peak Signal-to-Noise Ratio (PSNR): 19.95 dB
Signal-to-Noise Ratio (SNR): 47.61 dB




______________________________________
Fold 2:
Mean Squared Error (MSE): 285.30
Mean Absolute Error (MAE): 12.94
R-squared (R2): 0.00
Peak Signal-to-Noise Ratio (PSNR): 20.36 dB
Signal-to-Noise Ratio (SNR): 48.02 dB




______________________________________
Fold 3:
Mean Squared Error (MSE): 292.45
Mean Absolute Error (MAE): 13.13
R-squared (R2): 0.02
Peak Signal-to-Noise Ratio (PSNR): 20.25 dB
Signal-to-Noise Ratio (SNR): 47.91 dB




______________________________________
Fold 4:
Mean Squared Error (MSE): 337.28
Mean Absolute Error (MAE): 14.48
R-squared (R2): -0.07
Peak Signal-to-Noise Ratio (PSNR): 19.63 dB
Signal-to-Noise Ratio (SNR): 47.29 dB




______________________________________
Fold 5:
Mean Squared Error (MSE): 318.86
Mean Absolute Error (MAE): 13.76
R-squared (R2): 0.04
Peak Signal-to-Noise Ratio (PSNR): 19.87 dB
Signal-to-Noise Ratio (SNR): 47.54 dB




______________________________________
Fold 6:
Mean Squared Error (MSE): 279.08
Mean Absolute Error (MAE): 13.04
R-squared (R2): 0.08
Peak Signal-to-Noise Ratio (PSNR): 20.45 dB
Signal-to-Noise Ratio (SNR): 48.12 dB




______________________________________
Fold 7:
Mean Squared Error (MSE): 301.21
Mean Absolute Error (MAE): 12.83
R-squared (R2): -0.00
Peak Signal-to-Noise Ratio (PSNR): 20.12 dB
Signal-to-Noise Ratio (SNR): 47.78 dB




______________________________________
Fold 8:
Mean Squared Error (MSE): 317.05
Mean Absolute Error (MAE): 13.85
R-squared (R2): 0.07
Peak Signal-to-Noise Ratio (PSNR): 19.90 dB
Signal-to-Noise Ratio (SNR): 47.56 dB




______________________________________
Fold 9:
Mean Squared Error (MSE): 358.84
Mean Absolute Error (MAE): 14.62
R-squared (R2): -0.02
Peak Signal-to-Noise Ratio (PSNR): 19.36 dB
Signal-to-Noise Ratio (SNR): 47.02 dB




______________________________________
Fold 10:
Mean Squared Error (MSE): 343.62
Mean Absolute Error (MAE): 14.32
R-squared (R2): -0.00
Peak Signal-to-Noise Ratio (PSNR): 19.55 dB
Signal-to-Noise Ratio (SNR): 47.21 dB

Average Metrics Across Folds:
Mean Squared Error (MSE): 314.74
Mean Absolute Error (MAE): 13.65
R-squared (R2): 0.02
Peak Signal-to-Noise Ratio (PSNR): 19.94 dB
Signal-to-Noise Ratio (SNR): 47.61 dB


# 2. Random Forest

In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_
    # Create a Random Forest Regressor
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    # Train the Random Forest Regressor
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = rf_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')

______________________________________
Fold 1:
Mean Squared Error (MSE): 342.40
Mean Absolute Error (MAE): 14.20
R-squared (R2): 0.02
Peak Signal-to-Noise Ratio (PSNR): 19.56 dB
Signal-to-Noise Ratio (SNR): 47.23 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 302.64
Mean Absolute Error (MAE): 13.48
R-squared (R2): -0.06
Peak Signal-to-Noise Ratio (PSNR): 20.10 dB
Signal-to-Noise Ratio (SNR): 47.76 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 303.89
Mean Absolute Error (MAE): 13.33
R-squared (R2): -0.02
Peak Signal-to-Noise Ratio (PSNR): 20.08 dB
Signal-to-Noise Ratio (SNR): 47.75 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 355.54
Mean Absolute Error (MAE): 14.87
R-squared (R2): -0.12
Peak Signal-to-Noise Ratio (PSNR): 19.40 dB
Signal-to-Noise Ratio (SNR): 47.06 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 324.41
Mean Absolute Error (MAE): 13.97
R-squared (R2): 0.03

# 3. Voting

In [9]:
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    
    # Create a Voting Regressor
    voting_model = VotingRegressor([('random_forest', rf_model), ('linear_regression', lr_model)])
    
    # Train the Voting Regressor
    voting_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = voting_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')

______________________________________
Fold 1:
Mean Squared Error (MSE): 309.79
Mean Absolute Error (MAE): 13.39
R-squared (R2): 0.11
Peak Signal-to-Noise Ratio (PSNR): 20.00 dB
Signal-to-Noise Ratio (SNR): 47.66 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 266.42
Mean Absolute Error (MAE): 12.54
R-squared (R2): 0.07
Peak Signal-to-Noise Ratio (PSNR): 20.65 dB
Signal-to-Noise Ratio (SNR): 48.32 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 282.69
Mean Absolute Error (MAE): 13.15
R-squared (R2): 0.05
Peak Signal-to-Noise Ratio (PSNR): 20.40 dB
Signal-to-Noise Ratio (SNR): 48.06 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 316.38
Mean Absolute Error (MAE): 14.07
R-squared (R2): 0.00
Peak Signal-to-Noise Ratio (PSNR): 19.91 dB
Signal-to-Noise Ratio (SNR): 47.57 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 300.55
Mean Absolute Error (MAE): 13.39
R-squared (R2): 0.10
Pe

# 4. XGBOOST

In [10]:
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    xgb_model = XGBRegressor(objective='reg:squarederror', random_state=i)  # Use reg:squarederror for regression
    
    # Create a Voting Regressor
    voting_model = VotingRegressor([('random_forest', rf_model), ('linear_regression', lr_model), ('xgboost', xgb_model)])
    
    # Train the Voting Regressor
    voting_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = voting_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')


______________________________________
Fold 1:
Mean Squared Error (MSE): 320.98
Mean Absolute Error (MAE): 13.84
R-squared (R2): 0.08
Peak Signal-to-Noise Ratio (PSNR): 19.85 dB
Signal-to-Noise Ratio (SNR): 47.51 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 275.80
Mean Absolute Error (MAE): 12.72
R-squared (R2): 0.03
Peak Signal-to-Noise Ratio (PSNR): 20.50 dB
Signal-to-Noise Ratio (SNR): 48.17 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 293.63
Mean Absolute Error (MAE): 13.28
R-squared (R2): 0.02
Peak Signal-to-Noise Ratio (PSNR): 20.23 dB
Signal-to-Noise Ratio (SNR): 47.89 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 327.15
Mean Absolute Error (MAE): 14.41
R-squared (R2): -0.03
Peak Signal-to-Noise Ratio (PSNR): 19.76 dB
Signal-to-Noise Ratio (SNR): 47.43 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 327.28
Mean Absolute Error (MAE): 13.92
R-squared (R2): 0.02
P

# 5. Stacking

In [11]:
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_    
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    xgb_model = XGBRegressor(objective='reg:squarederror', random_state=i)  # Use reg:squarederror for regression
    
    # Create a Stacking Regressor
    stacking_model = StackingRegressor(
        estimators=[('random_forest', rf_model), ('linear_regression', lr_model), ('xgboost', xgb_model)],
        final_estimator=LinearRegression()  # You can use a different final estimator
    )
    
    # Train the Stacking Regressor
    stacking_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = stacking_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')


______________________________________
Fold 1:
Mean Squared Error (MSE): 309.00
Mean Absolute Error (MAE): 13.34
R-squared (R2): 0.11
Peak Signal-to-Noise Ratio (PSNR): 20.01 dB
Signal-to-Noise Ratio (SNR): 47.67 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 259.48
Mean Absolute Error (MAE): 12.36
R-squared (R2): 0.09
Peak Signal-to-Noise Ratio (PSNR): 20.77 dB
Signal-to-Noise Ratio (SNR): 48.43 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 280.69
Mean Absolute Error (MAE): 13.21
R-squared (R2): 0.06
Peak Signal-to-Noise Ratio (PSNR): 20.43 dB
Signal-to-Noise Ratio (SNR): 48.09 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 318.09
Mean Absolute Error (MAE): 14.07
R-squared (R2): -0.00
Peak Signal-to-Noise Ratio (PSNR): 19.88 dB
Signal-to-Noise Ratio (SNR): 47.55 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 296.24
Mean Absolute Error (MAE): 13.28
R-squared (R2): 0.11
P

# 6. GBDT

In [13]:
from sklearn.ensemble import RandomForestRegressor, StackingRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming X, y are your features and target variable

# Initialize metrics lists to store results for each fold
mse_list, mae_list, r2_list, psnr_list, snr_list = [], [], [], [], []

for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    # Applying PCA function on training
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    # and testing set of X component
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 2)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance = pca.explained_variance_ratio_
    # Create individual regression models
    rf_model = RandomForestRegressor(n_estimators=100, random_state=i)
    lr_model = LinearRegression()
    gbdt_model = GradientBoostingRegressor(n_estimators=100, random_state=i)  # Use GradientBoostingRegressor for GBDT
    
    # Create a Stacking Regressor
    stacking_model = StackingRegressor(
        estimators=[('random_forest', rf_model), ('linear_regression', lr_model), ('gbdt', gbdt_model)],
        final_estimator=LinearRegression()  # You can use a different final estimator
    )
    
    # Train the Stacking Regressor
    stacking_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = stacking_model.predict(X_test)
    
    # Evaluate performance metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    # Additional metrics for signal quality
    psnr = 10 * np.log10(np.max(y) ** 2 / mse)
    snr = 10 * np.log10(np.sum(y ** 2) / mse)
    
    # Append metrics to lists
    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)
    psnr_list.append(psnr)
    snr_list.append(snr)
    
    print("______________________________________")
    print(f'Fold {i + 1}:')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'R-squared (R2): {r2:.2f}')
    print(f'Peak Signal-to-Noise Ratio (PSNR): {psnr:.2f} dB')
    print(f'Signal-to-Noise Ratio (SNR): {snr:.2f} dB')

# Calculate and print average metrics
print("\nAverage Metrics Across Folds:")
print(f'Mean Squared Error (MSE): {np.mean(mse_list):.2f}')
print(f'Mean Absolute Error (MAE): {np.mean(mae_list):.2f}')
print(f'R-squared (R2): {np.mean(r2_list):.2f}')
print(f'Peak Signal-to-Noise Ratio (PSNR): {np.mean(psnr_list):.2f} dB')
print(f'Signal-to-Noise Ratio (SNR): {np.mean(snr_list):.2f} dB')


______________________________________
Fold 1:
Mean Squared Error (MSE): 308.69
Mean Absolute Error (MAE): 13.34
R-squared (R2): 0.11
Peak Signal-to-Noise Ratio (PSNR): 20.02 dB
Signal-to-Noise Ratio (SNR): 47.68 dB
______________________________________
Fold 2:
Mean Squared Error (MSE): 263.16
Mean Absolute Error (MAE): 12.46
R-squared (R2): 0.08
Peak Signal-to-Noise Ratio (PSNR): 20.71 dB
Signal-to-Noise Ratio (SNR): 48.37 dB
______________________________________
Fold 3:
Mean Squared Error (MSE): 281.47
Mean Absolute Error (MAE): 13.22
R-squared (R2): 0.06
Peak Signal-to-Noise Ratio (PSNR): 20.42 dB
Signal-to-Noise Ratio (SNR): 48.08 dB
______________________________________
Fold 4:
Mean Squared Error (MSE): 314.23
Mean Absolute Error (MAE): 13.90
R-squared (R2): 0.01
Peak Signal-to-Noise Ratio (PSNR): 19.94 dB
Signal-to-Noise Ratio (SNR): 47.60 dB
______________________________________
Fold 5:
Mean Squared Error (MSE): 296.49
Mean Absolute Error (MAE): 13.29
R-squared (R2): 0.11
Pe