In [83]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
import joblib

In [84]:
np.random.seed(0)

n = 200

# פיצ'רים בסקיילים שונים
salary = np.random.normal(100_000, 40_000, n)
kids = np.random.randint(0, 5, n)
experience = np.random.randint(1, 20, n)
credit_score = np.random.normal(680, 60, n)

X = np.column_stack([salary, kids, experience, credit_score])

# משתנה מטרה (0/1)
score = (
    0.00002 * salary
    + 0.4 * experience
    + 0.005 * credit_score
    - 0.8 * kids
    - 10
)

y = (score > 0).astype(int)

In [85]:
# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

logreg_no_scale = LogisticRegression(max_iter=1000)
logreg_no_scale.fit(X_train, y_train)

y_pred = logreg_no_scale.predict(X_test)

print("Logistic Regression no scale: ")
print(f"Accuracy score: {accuracy_score(y_test, y_pred) * 100}%")

Logistic Regression no scale: 
Accuracy score: 97.5%


In [86]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
logreg_scaled = LogisticRegression()
logreg_scaled.fit(X_train_scaled, y_train)

X_test_scaled = scaler.transform(X_test)
y_pred = logreg_scaled.predict(X_test_scaled)

print("Logistic Regression scaled: ")
print(f"Accuracy score: {accuracy_score(y_test, y_pred) * 100}%")

Logistic Regression scaled: 
Accuracy score: 97.5%


In [87]:
def percent_difference(a, b):
    denom = (abs(a) + abs(b)) / 2
    if denom == 0:
        return 0.0
    return abs(a - b) / denom * 100
    
print(f"Not scaled vs Scaled[Total % change]: {percent_difference(accuracy_score(y_test, y_pred), accuracy_score(y_test, y_pred))}%")

Not scaled vs Scaled[Total % change]: 0.0%


In [88]:
score_not_scaled = cross_val_score(logreg_no_scale, X, y, cv=5, scoring="accuracy")
print(f"Real score not scaled: {score_not_scaled.mean()}")

Real score not scaled: 0.99


In [89]:
logreg_pipeline = Pipeline([
    ('scaler', StandardScaler()), 
    ('logreg', LogisticRegression())
])
score_not_scaled = cross_val_score(logreg_pipeline, X, y, cv=5, scoring="accuracy")
print(f"Real score pipeline: {score_not_scaled.mean() * 100}%")

logreg_on_pipeline = logreg_pipeline.fit(X, y)
y_pred = logreg_on_pipeline.predict(X_test)
print()
print("Logistic Regression pipeline: ")
print(f"Accuracy score: {accuracy_score(y_test, y_pred) * 100}%")

Real score pipeline: 97.5%

Logistic Regression pipeline: 
Accuracy score: 100.0%


In [90]:
joblib.dump(logreg_on_pipeline, 'logreg_on_pipeline.joblib')

['logreg_on_pipeline.joblib']

In [91]:
loaded_model = joblib.load('logreg_on_pipeline.joblib')
sample = np.array([[120_000, 2, 10, 720]])
prediction = loaded_model.predict(sample)
print(f'Prediction [0/1]: {prediction[0]}')

Prediction [0/1]: 0
