In [182]:
import numpy as np
from sklearn.linear_model import LogisticRegression as LogReg
from sklearn.metrics import accuracy_score as accuracy

In [214]:
def generate_label(x):
    prob_y_equals_one = 0.85 if x < 0.5 else 0.15
    return np.random.choice([1, 0], p=[prob_y_equals_one, 1 - prob_y_equals_one])

def bayes_optimal_classifier(x):
    return 1 if x < .5 else 0

vectorized_generate_labels = np.vectorize(generate_label)
vectorized_bayes_classifier = np.vectorize(bayes_optimal_classifier)

In [215]:
n_array = [100, 1000]

for n in n_array:
    X_train = np.random.uniform(0, 1, size=n).reshape(-1, 1)
    Y_train = vectorized_generate_labels(X_train).ravel()

    logistic_regression = LogReg()
    _ = logistic_regression.fit(X_train, Y_train)

    X_test = np.random.uniform(0, 1, size=n).reshape(-1, 1)
    Y_test = vectorized_generate_labels(X_test).ravel()

    Y_bayes_test = vectorized_bayes_classifier(X_test)
    Y_pred = logistic_regression.predict(X_test)

    print("===========      n = {:5d}       ===========".format(n))
    print("Logistic Regression Classifier Accuracy  : " + str(accuracy(Y_test, Y_pred)))
    print("Bayes Optimal Classifier Accuracy        : " + str(accuracy(Y_test, Y_bayes_test)))

Logistic Regression Classifier Accuracy  : 0.79
Bayes Optimal Classifier Accuracy        : 0.85
Logistic Regression Classifier Accuracy  : 0.833
Bayes Optimal Classifier Accuracy        : 0.846
