In [None]:
from sklearn.datasets import make_classification
import numpy as np


X, y = make_classification(n_samples=100, n_features=10, n_informative=5, n_redundant=2, random_state=42)

X_text = [" ".join([f"feature{i}_{val}" for i, val in enumerate(row)]) for row in X]

print("Sample features (text-like):")
for i in range(5):
    print(X_text[i])
print("\nSample labels:")
print(y[:5])
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_cost(X, y, weights, bias):
    m = X.shape[0]
    h = sigmoid(np.dot(X, weights) + bias)
    cost = (-1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    return cost

def gradient_descent(X, y, weights, bias, learning_rate, num_iterations):
    m = X.shape[0]
    costs = []

    for _ in range(num_iterations):
        h = sigmoid(np.dot(X, weights) + bias)
        dw = (1/m) * np.dot(X.T, (h - y))
        db = (1/m) * np.sum(h - y)

        weights -= learning_rate * dw
        bias -= learning_rate * db

        cost = compute_cost(X, y, weights, bias)
        costs.append(cost)

    return weights, bias, costs

def predict(X, weights, bias):
    h = sigmoid(np.dot(X, weights) + bias)
    y_prediction = (h > 0.5).astype(int)
    return y_prediction

class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None
        self.costs = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        self.weights, self.bias, self.costs = gradient_descent(X, y, self.weights, self.bias, self.learning_rate, self.num_iterations)

    def predict(self, X):
        return predict(X, self.weights, self.bias)

model = LogisticRegression(learning_rate=0.01, num_iterations=1000)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on the test set: {accuracy:.4f}")

Sample features (text-like):
feature0_-0.1490366667295299 feature1_3.6397899913839704 feature2_-4.77202522973986 feature3_-0.006653454631237347 feature4_-1.712936631956997 feature5_-2.7458937711934364 feature6_-1.0248226493397838 feature7_4.487370834009874 feature8_-1.1254190077468889 feature9_-1.0148534542694319
feature0_2.6435715833985505 feature1_2.2477201422725925 feature2_0.26929568320199326 feature3_-0.20284621967015248 feature4_2.757147072496183 feature5_2.6745462129469226 feature6_-2.0242249704321327 feature7_0.3013079301649735 feature8_0.18053084180543139 feature9_1.4558042487379736
feature0_0.3431350510168737 feature1_-0.9457604059068692 feature2_0.5579200883668298 feature3_1.3238748037872132 feature4_-1.249062046897537 feature5_2.2919250286196764 feature6_-0.7443030472470422 feature7_-0.26229478380375704 feature8_1.2056280779528372 feature9_-0.7259418982501984
feature0_-0.3003604880886286 feature1_-1.5113535061026575 feature2_-0.6326839257833132 feature3_-0.8047869166603249 