In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
# Step 1: Data Generation (Simulated Binary Classification Data)
np.random.seed(42)
X = np.random.rand(500, 3)  # 3 Features
y = (X[:, 0] + X[:, 1] > 1).astype(int)  # Binary Target

# Step 2: Preprocessing (Feature Scaling)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 4: Define Logistic Loss Function (Binary Cross-Entropy is used internally by LogisticRegression)
def logistic_loss(y_true, y_pred_probs):
    epsilon = 1e-15  # To avoid log(0)
    y_pred_probs = np.clip(y_pred_probs, epsilon, 1 - epsilon)
    return -np.mean(y_true * np.log(y_pred_probs) + (1 - y_true) * np.log(1 - y_pred_probs))

# Step 5: Train Logistic Regression Model (Optimize Model Parameters)
model = LogisticRegression(solver='lbfgs', penalty='l2', C=1.0, random_state=42)
model.fit(X_train, y_train)

# Step 6: Estimate Probabilities
y_train_probs = model.predict_proba(X_train)[:, 1]
y_test_probs = model.predict_proba(X_test)[:, 1]

# Step 7: Compute Predictions
y_train_pred = (y_train_probs > 0.5).astype(int)
y_test_pred = (y_test_probs > 0.5).astype(int)

# Step 8: Hyperparameter Validation (Checking model performance)
train_loss = logistic_loss(y_train, y_train_probs)
test_loss = logistic_loss(y_test, y_test_probs)

# Step 9: Evaluate Model Performance
metrics = {
    "Accuracy": accuracy_score(y_test, y_test_pred),
    "Precision": precision_score(y_test, y_test_pred),
    "Recall": recall_score(y_test, y_test_pred),
    "F1 Score": f1_score(y_test, y_test_pred),
    "ROC-AUC": roc_auc_score(y_test, y_test_probs),
    "Train Loss": train_loss,
    "Test Loss": test_loss
}

# Step 10: Predict New Data
new_X = np.array([[0.6, 0.8, 0.4]])  # Example new input
new_X_scaled = scaler.transform(new_X)
new_prob = model.predict_proba(new_X_scaled)[:, 1]
new_prediction = (new_prob > 0.5).astype(int)

# Display Results
print("Model Performance Metrics:", metrics)
print(f"Prediction for New Data {new_X}: Class = {new_prediction[0]}, Probability = {new_prob[0]:.4f}")

Model Performance Metrics: {'Accuracy': 0.99, 'Precision': 0.9803921568627451, 'Recall': 1.0, 'F1 Score': 0.9900990099009901, 'ROC-AUC': np.float64(1.0), 'Train Loss': np.float64(0.10480863629186937), 'Test Loss': np.float64(0.10321152069132192)}
Prediction for New Data [[0.6 0.8 0.4]]: Class = 1, Probability = 0.9975
