In [48]:
import numpy as np
import pandas as pd
from scipy.optimize import least_squares
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load Dataset (Assuming 'df' is already loaded)
df = pd.read_excel('test.xlsx')
# 'Kd' is the target, and all other columns are features
X = df[['H', 'B', 'D', 't']].values  # 4 input features
y = df['Kd'].values.reshape(-1, 1)  # Ensure correct shape
print(df.head())

       H     B      D       t        Kd
0  1.625  1.25  0.188  0.0188  4.858459
1  1.625  1.25  0.188  0.0283  3.496159
2  1.625  1.25  0.188  0.0312  3.165983
3  1.625  1.25  0.188  0.0346  2.966884
4  2.500  1.25  0.188  0.0188  4.360603


In [None]:
# Activation Functions
def logsigmoid(x):
    x = np.clip(x, -500, 500)  # Clip input to prevent overflow in exp()
    return 1 / (1 + np.exp(-x)) # Log-sigmoid activation for hidden layer


def identity(x):
    return x  # Identity for output layer

# Forward Pass: compute the network output given input X and weights
def forward_pass(X, weights):
    W1 = weights[:features * hidden_neuron].reshape(features, hidden_neuron)  # (5, hidden_neuron)
    b1 = weights[features * hidden_neuron : features * hidden_neuron + hidden_neuron].reshape(1, hidden_neuron)  # (1, hidden_neuron)
    W2 = weights[features * hidden_neuron + hidden_neuron : features * hidden_neuron + 2 * hidden_neuron].reshape(hidden_neuron, 1)  # (hidden_neuron, 1)
    b2 = weights[-1]  # Single bias for output neuron
    
    hidden_layer = logsigmoid(np.dot(X, W1) + b1)  # Log-Sigmoid Activation
    output_layer = identity(np.dot(hidden_layer, W2) + b2)  # Identity Activation

    return output_layer

# Cost Function (LM requires residuals)
def loss_function(weights, X, y):
    y_pred = forward_pass(X, weights)
    return (y_pred - y).ravel() # Flatten for least_squares


# Standardize Input & Output
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X = scaler_X.fit_transform(X)
y = scaler_y.fit_transform(y)  # Standardize y too!

# Define Model Structure
features = 4 # Number of input features
hidden_neuron = 2  # Number of neurons in the hidden layer
total_weights = (features * hidden_neuron) + hidden_neuron + hidden_neuron + 1  # Total number of trainable parameters

# Split into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42) 

# Initialize Weights Randomly
initial_weights = np.random.randn(total_weights)  # 15 parameters

# Apply Levenberg-Marquardt Optimization
result = least_squares(loss_function, initial_weights, method='lm', args=(X_train, y_train))

# Optimized Weights
optimized_weights = result.x
print("Optimized Weights:\n", optimized_weights)

# Evaluate Model on Training and Test Data
def evaluate_regression(X, y, weights, scaler_y):
    y_pred = forward_pass(X, weights)
    
    # Inverse transform y_pred to original scale
    y_pred_original = scaler_y.inverse_transform(y_pred)
    y_original = scaler_y.inverse_transform(y)
    
    

    mse = mean_squared_error(y_original, y_pred_original)
    mae = mean_absolute_error(y_original, y_pred_original)
    r2 = r2_score(y_original, y_pred_original)
    
    return mse, mae, r2, y_original, y_pred_original

# Get Metrics for Training and Testing
train_mse, train_mae, train_r2, y_train_original, y_train_pred_original = evaluate_regression(X_train, y_train, optimized_weights, scaler_y)
test_mse, test_mae, test_r2, y_test_original, y_test_pred_original = evaluate_regression(X_test, y_test, optimized_weights, scaler_y)

# Print Results
print(f"Training MSE: {train_mse:.4f}, MAE: {train_mae:.4f}, R²: {train_r2:.4f}")
print(f"Test MSE: {test_mse:.4f}, MAE: {test_mae:.4f}, R²: {test_r2:.4f}")

# Compare y_test and y_pred Side by Side (Original Scale)
comparison_df = pd.DataFrame({
    'Actual (y_test)': y_test_original.flatten(),
    'Predicted (y_pred)': y_test_pred_original.flatten(),
    'Error (%)': np.abs(((y_test_original.flatten()-y_test_pred_original.flatten())*100/y_test_original.flatten()))
})

# Print First 20 Rows
print(comparison_df.head(20))


Optimized Weights:
 [-2.15366560e-02 -7.52658839e-02  2.15878364e-01  1.42712132e+01
  2.40604438e-01 -1.96066784e+00 -1.06366673e+00  1.16826808e-01
 -8.94233299e+00  2.13933889e+01  7.13756080e+03  4.17283261e+03
 -4.17385707e+03]
Training MSE: 0.0424, MAE: 0.1428, R²: 0.9701
Test MSE: 0.0560, MAE: 0.1777, R²: 0.9627
    Actual (y_test)  Predicted (y_pred)  Error (%)
0          3.437131            3.193715   7.081976
1          2.967681            3.252070   9.582866
2          5.472730            5.393467   1.448328
3          2.538444            2.595351   2.241797
4          3.892027            3.287219  15.539668
5          3.334042            2.810117  15.714403
6          4.511162            4.807198   6.562303
7          4.313159            4.493429   4.179525
8          2.057608            2.090026   1.575488
9          3.798328            3.680514   3.101724
10         2.974447            2.992158   0.595425
11         2.621114            2.757521   5.204158
12         2.729

In [50]:
# Evaluate on the Entire Dataset
mse, mae, r2, y_original, y_pred_original = evaluate_regression(X, y, optimized_weights, scaler_y)
# Create DataFrame to Compare
full_comparison_df = pd.DataFrame({
    'Actual (y)': y_original.flatten(),
    'Predicted (y_pred)': y_pred_original.flatten(),
    'Error (%)': np.abs(((y_original.flatten() - y_pred_original.flatten()) * 100 / y_original.flatten()))
})

# Display Results
print(full_comparison_df.head(50))  # Show first 50 rows

# Error Statistics
print(f"Max Error (%): {full_comparison_df['Error (%)'].max():.2f}")
print(f"Min Error (%): {full_comparison_df['Error (%)'].min():.2f}")
error_above_10_percent = (full_comparison_df['Error (%)'] > 10).sum()
print(f"Count of Errors > 10%: {error_above_10_percent}")

    Actual (y)  Predicted (y_pred)  Error (%)
0     4.858459            4.415081   9.125892
1     3.496159            3.505521   0.267785
2     3.165983            3.285643   3.779574
3     2.966884            3.056347   3.015417
4     4.360603            4.374529   0.319343
5     3.132846            3.470539  10.779114
6     2.967681            3.252070   9.582866
7     2.734263            3.024281  10.606830
8     2.321248            2.480031   6.840404
9     2.057608            2.090026   1.575488
10    4.331673            4.327774   0.090001
11    3.190949            3.430141   7.495948
12    3.021513            3.213281   6.346728
13    2.763243            2.987213   8.105344
14    2.268575            2.447350   7.880459
15    2.009174            2.060931   2.576018
16    4.383091            4.321899   1.396101
17    3.124499            3.425060   9.619494
18    3.057160            3.208400   4.947084
19    2.766603            2.982548   7.805402
20    2.294049            2.443232

In [51]:
# Get mean and standard deviation of input features
X_means = scaler_X.mean_
X_stds = scaler_X.scale_

# Get mean and standard deviation of target variable
y_mean = scaler_y.mean_[0]
y_std = scaler_y.scale_[0]

print("X Means:", X_means)
print("X Standard Deviations:", X_stds)
print("y Mean:", y_mean)
print("y Standard Deviation:", y_std)

X Means: [7.01946721 2.06454918 0.53210656 0.06766475]
X Standard Deviations: [3.82262397 0.71274148 0.21078035 0.03034432]
y Mean: 3.6378581098196467
y Standard Deviation: 1.2040775968079265


In [52]:
# Reverse scale all data
X_all = np.vstack((X_train, X_test))  # Combine training and test data
X_all_original = scaler_X.inverse_transform(X_all)  # Reverse scale input features
y_all_actual = np.vstack((y_train_original, y_test_original))  # Actual values
y_all_predicted = np.vstack((y_train_pred_original, y_test_pred_original))  # Predicted values

# Create DataFrame with all data
final_df = pd.DataFrame(X_all_original)  # Restore original feature names
final_df['Actual Kd'] = y_all_actual.flatten()  # Add actual y values
final_df['Predicted Kd'] = y_all_predicted.flatten()  # Add predicted y values

# Save to Excel
#final_df.to_excel('All_data_results2Neuron.xlsx', index=False)