## Logistic Regression example

Create the artificial dataset

In [1]:
import numpy as np
import pandas as pd

# Create synthetic data
np.random.seed(42)
num_samples = 200
X1_class0 = np.random.normal(loc=5, scale=1, size=num_samples // 2)
X2_class0 = np.random.normal(loc=3, scale=1, size=num_samples // 2)
X3_class0 = np.random.normal(loc=8, scale=2, size=num_samples // 2)
X4_class0 = np.random.normal(loc=4, scale=2, size=num_samples // 2)
X5_class0 = np.random.normal(loc=6, scale=1, size=num_samples // 2)

X1_class1 = np.random.normal(loc=10, scale=1, size=num_samples // 2)
X2_class1 = np.random.normal(loc=8, scale=1, size=num_samples // 2)
X3_class1 = np.random.normal(loc=15, scale=2, size=num_samples // 2)
X4_class1 = np.random.normal(loc=12, scale=2, size=num_samples // 2)
X5_class1 = np.random.normal(loc=18, scale=1, size=num_samples // 2)

# Create feature matrix X and target vector y
X_class0 = np.column_stack((X1_class0, X2_class0, X3_class0, X4_class0, X5_class0))
X_class1 = np.column_stack((X1_class1, X2_class1, X3_class1, X4_class1, X5_class1))
X = np.vstack((X_class0, X_class1))

y_class0 = np.zeros(num_samples // 2)
y_class1 = np.ones(num_samples // 2)
y = np.concatenate((y_class0, y_class1))

# Shuffle the data
indices = np.arange(num_samples)
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# Convert data to a Pandas DataFrame
data = pd.DataFrame({'X1': X[:, 0], 'X2': X[:, 1], 'X3': X[:, 2], 'X4': X[:, 3], 'X5': X[:, 4], 'y': y})

In [2]:
data

Unnamed: 0,X1,X2,X3,X4,X5,y
0,10.394452,8.198085,13.283284,10.895554,18.569767,1.0
1,4.470240,2.153206,8.561984,5.900848,7.687142,0.0
2,9.143916,8.444263,11.678078,14.217407,18.026886,1.0
3,10.018418,9.246085,16.415504,11.646106,18.128104,1.0
4,9.584712,6.870948,12.329311,10.938262,18.425458,1.0
...,...,...,...,...,...,...
195,4.520826,2.025318,6.146139,5.038693,7.964725,0.0
196,10.562969,9.355638,12.183077,11.075449,19.278452,1.0
197,5.343618,3.521942,11.530908,2.563112,5.486133,0.0
198,11.676437,5.926610,13.875066,10.403406,17.318948,1.0


In [3]:
from sklearn.model_selection import train_test_split

# Import the LogisticRegression class from your code (assuming it's in the same file)
# from your_code_file import LogisticRegression

# Create the artificial dataset
# (use the code provided in the previous response to generate the dataset)

# Split the data into training and testing sets
X = data[['X1', 'X2', 'X3', 'X4', 'X5']].values
y = data['y'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Test the model

In [4]:
from tensorflow_ml.classification.logistic_regression import LogisticRegression

# Initialize and set hyperparameters for the LogisticRegression class
logistic_regression = LogisticRegression()
params = {
    'learning_rate': 0.01,
    'num_epochs': 1000,
    'batch_size': 32,
    'reg_strength': 0.001,
    'early_stopping_patience': 10,
    'regularization': 'l2'
}
logistic_regression.set_params(params)

# Train the model
logistic_regression.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy, cross_entropy_loss = logistic_regression.score(X_test, y_test)

print(f"Accuracy on test set: {accuracy:.4f}")
print(f"Cross-entropy loss on test set: {cross_entropy_loss:.4f}")



Accuracy on test set: 0.5250
Cross-entropy loss on test set: 3.0167


In [5]:
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

lr = LR()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")

Accuracy: 1.0000
