In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# data
age = np.array([ 22,25,30,35,42,50,23,28,33,48])
year_income = np.array([60,75,80,120,150,110,95,90,105,135])
credit_seniority = np.array([1,2,3,5,8,10,1,2,4,9])
return_loan = np.array(['no','no','yes','yes','yes','yes','no','no','yes','yes'])

# Create feature matrix
X = np.column_stack((age,year_income,credit_seniority))
y = return_loan

#1:תקנן את הנתונים באמצעות נרמול Min-Max
# Normalize data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

#2:השתמש באלגוריתם KNN עם K=3 לניבוי האם לקוח חדש בגיל 27, עם הכנסה שנתית של 95 אלפי ₪ וותק אשראי של 3 שנים, יחזיר את ההלוואה שלו.
# Train the KNN model
k = 3  # number of neighbors
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_scaled, y)

# New data to classify
new_data = np.array([[27,95,3]])
new_data_scaled = scaler.transform(new_data)
prediction = model.predict(new_data_scaled)
probabilities = model.predict_proba(new_data_scaled)

# Output result
print(f"Prediction: The new customer will repay his loan? - {prediction[0]}")
print(f"Probabilities: {probabilities}")

#3:השתמש ב GRID SEARCH CV ובדוק מה מהפרמטרים הבאים יניב את התוצאות הטובות ביותר:
#n_neighbors: 1-31
#weights: [uniform, distance]
#metric: [euclidean, manhattan]

#Defining the range of parameters we will test
param_grid = {
    "n_neighbors": list(range(1, 32)), 
    "weights": ["uniform", "distance"],
    "metric": ["euclidean", "manhattan"] 
}

#Using GridSearchCV
grid = GridSearchCV(
    estimator=KNeighborsClassifier(),
    param_grid=param_grid,
    cv=5,            
    scoring="accuracy",
    n_jobs=-1         
)

# Training
grid.fit(X_scaled, y)

# Results
print("Best parameters:", grid.best_params_)
print("Best score:", grid.best_score_)

Prediction: The new customer will repay his loan? - no
Probabilities: [[0.66666667 0.33333333]]




Best parameters: {'metric': 'euclidean', 'n_neighbors': 2, 'weights': 'uniform'}
Best score: 0.9


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan 0.9 0.9 0.9 0.9 0.9 0.9 nan 0.9 nan 0.9
 nan 0.8 nan 0.9 nan 0.9 nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]
