In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import euclidean

# Load Iris dataset
iris = load_iris()

# Define KNN model-based approach for classification
def knn_model_based_approach(X_train, y_train, X_test):
    # Step 1: Calculate similarity matrix
    similarity_matrix = np.zeros((X_train.shape[0], X_train.shape[0]))
    for i in range(X_train.shape[0]):
        for j in range(X_train.shape[0]):
            similarity_matrix[i,j] = 1 / (1 + euclidean(X_train[i], X_train[j]))
    
    # Step 2: Initialize tags
    tags = np.zeros(X_train.shape[0])
    
    # Step 3-5: Construct model M
    M = []
    while np.sum(tags) < X_train.shape[0]:
        ungrouped_idx = np.where(tags == 0)[0]
        largest_local_neighbourhood = np.zeros(ungrouped_idx.shape[0])
        for i, idx in enumerate(ungrouped_idx):
            neighbours = np.where(similarity_matrix[idx,:] >= 0.5)[0]
            categories = y_train[neighbours]
            largest_local_neighbourhood[i] = np.sum(categories == np.bincount(categories).argmax())
        largest_global_neighbourhood_idx = ungrouped_idx[np.argmax(largest_local_neighbourhood)]
        neighbours = np.where(similarity_matrix[largest_global_neighbourhood_idx,:] >= 0.5)[0]
        categories = y_train[neighbours]
        representative = (np.bincount(categories).argmax(), np.min(similarity_matrix[largest_global_neighbourhood_idx,neighbours]), neighbours.shape[0], X_train[largest_global_neighbourhood_idx])
        M.append(representative)
        tags[neighbours] = 1
    
    # Step 6: Return model M
    M = np.array(M)
    
    # Step 1: Calculate similarity between X_test and M
    similarity_matrix = np.zeros((X_test.shape[0], M.shape[0]))
    for i in range(X_test.shape[0]):
        for j in range(M.shape[0]):
            similarity_matrix[i,j] = 1 / (1 + euclidean(X_test[i], M[j,3]))
    
    # Step 2-4: Classify X_test using M
    y_pred = []
    for i in range(X_test.shape[0]):
        covering_reps_idx = np.where(similarity_matrix[i,:] >= M[:,1])[0]
        if covering_reps_idx.shape[0] == 1:
            y_pred.append(M[covering_reps_idx[0],0])
        else:
            if(covering_reps_idx.shape[0] == 0):
            
                y_pred.append(-1)
            else:
        
                covering_reps_num = M[covering_reps_idx,2]
                max_covering_reps_idx = np.argmax(covering_reps_num)
                y_pred.append(M[covering_reps_idx[max_covering_reps_idx],0])
    return np.array(y_pred)


# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
# Test KNN model-based approach for classification on Iris dataset
y_pred = knn_model_based_approach(X_train, y_train, X_test)
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)



Accuracy: 0.9666666666666667


  M = np.array(M)
