# Logistic regression revision

In [61]:
import numpy as np
import pandas as pd
import seaborn as sns
sns.set()
import random
from sklearn.metrics import precision_score, f1_score, recall_score, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

np.random.seed(42)  # For reproducibility

n_samples = 1000
X = np.random.uniform(0, 20, size=(n_samples, 1))  # Features
bias = np.random.uniform(-5, 5)  # Bias
weights = np.random.uniform(-4, 4, size=1)  # Weights for features

# Linear combination and logistic function
linear_combination = bias + np.dot(X, weights)

mean = 0
std_dev = 0.5

# Generate Gaussian noise
noise = np.random.normal(mean, std_dev, linear_combination.shape)

# Add noise to the original array
linear_combination = linear_combination + noise
probabilities = 1 / (1 + np.exp(-linear_combination))
labels = (probabilities > 0.5).astype(int)  # Binary labels

# Creating DataFrame
data = pd.DataFrame(X, columns=['study_hours'])
data['outcome'] = labels

print(data.head())


   study_hours  outcome
0     7.490802        0
1    19.014286        1
2    14.639879        1
3    11.973170        1
4     3.120373        0


In [58]:
data['Label'].value_counts()

Label
1    522
0    478
Name: count, dtype: int64

In [32]:
import seaborn as sns
sns.set()

In [18]:
bias

0.15714592843367126

In [21]:
weights

array([3.27282082])

In [None]:
sns.regplot(x = data['study_hours'], y = data['outcome'], data = data)

In [None]:
sns.lmplot(x = 'Feature', y = 'Label', logistic = True, data = data)

In [62]:
X = data.drop(['outcome'], axis = 'columns')
y = data['outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [63]:
clf = LogisticRegression(random_state=0).fit(X_train, y_train)


In [64]:
preds = clf.predict(X_test) # This outputs the model predictions on the withheld test data

In [65]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.91      0.99      0.95        93
           1       0.99      0.92      0.95       107

    accuracy                           0.95       200
   macro avg       0.95      0.95      0.95       200
weighted avg       0.95      0.95      0.95       200

