In [7]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the data for better distance computation
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the Weighted KNN function
def weighted_knn(X_train, y_train, X_test, k=5):
    predictions = []
    
    for test_point in X_test:
        # Compute distances between the test point and all training points
        distances = np.linalg.norm(X_train - test_point, axis=1)
        
        # Get the indices of the k nearest neighbors
        nearest_indices = np.argsort(distances)[:k]
        
        # Retrieve the classes of the nearest neighbors
        nearest_classes = y_train[nearest_indices]
        
        # Compute weights (inverse of distance, avoid division by zero)
        weights = 1 / (distances[nearest_indices] + 1e-5)
        
        # Aggregate weights for each class
        class_weights = {}
        for i, cls in enumerate(nearest_classes):
            class_weights[cls] = class_weights.get(cls, 0) + weights[i]
        
        # Predict the class with the highest total weight
        predicted_class = max(class_weights, key=class_weights.get)
        predictions.append(predicted_class)
    
    return np.array(predictions)

# Apply WKNN
k = 5
y_pred = weighted_knn(X_train, y_train, X_test, k=k)

# Evaluate the results
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Weighted KNN: {accuracy * 100:.2f}%")


Accuracy of Weighted KNN: 93.33%


In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def knn_predict(X_train, y_train, test_point, k=1):
    """
    Perform KNN classification for a single test point.
    """
    distances = np.linalg.norm(X_train - test_point, axis=1)
    nearest_indices = np.argsort(distances)[:k]
    nearest_labels = y_train[nearest_indices]
    unique, counts = np.unique(nearest_labels, return_counts=True)
    return unique[np.argmax(counts)]

def condensed_nearest_neighbor(X_train, y_train):
    """
    Condensed Nearest Neighbor algorithm to reduce training dataset size.
    """
    # Start with one randomly selected point
    indices = np.random.permutation(len(X_train))
    S_X = [X_train[indices[0]]]
    S_y = [y_train[indices[0]]]
    
    for _ in range(10):  # Fixed number of iterations
        misclassified = False
        for i in indices[1:]:
            pred = knn_predict(np.array(S_X), np.array(S_y), X_train[i])
            if pred != y_train[i]:
                S_X.append(X_train[i])
                S_y.append(y_train[i])
                misclassified = True
        if not misclassified:
            break
    
    return np.array(S_X), np.array(S_y)

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply CNN
condensed_X_train, condensed_y_train = condensed_nearest_neighbor(X_train, y_train)

# Evaluate using KNN
y_pred = [knn_predict(condensed_X_train, condensed_y_train, test_point) for test_point in X_test]
accuracy = accuracy_score(y_test, y_pred)

# Results
print(f"Original training size: {len(X_train)}")
print(f"Reduced training size: {len(condensed_X_train)}")
print(f"Accuracy with condensed dataset: {accuracy * 100:.2f}%")


Original training size: 105
Reduced training size: 15
Accuracy with condensed dataset: 97.78%


In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def knn_predict(X_train, y_train, test_point, k=1):
    """
    Perform KNN classification for a single test point.
    """
    distances = np.linalg.norm(X_train - test_point, axis=1)
    nearest_indices = np.argsort(distances)[:k]
    nearest_labels = y_train[nearest_indices]
    unique, counts = np.unique(nearest_labels, return_counts=True)
    return unique[np.argmax(counts)]

def reduced_nearest_neighbor(X_train, y_train):
    """
    Reduced Nearest Neighbor algorithm to reduce training dataset size.
    """
    S_X = list(X_train)
    S_y = list(y_train)
    
    removed = True
    while removed:
        removed = False
        for i in range(len(S_X) - 1, -1, -1):  # Iterate in reverse to avoid indexing issues
            temp_X = S_X[:i] + S_X[i+1:]
            temp_y = S_y[:i] + S_y[i+1:]
            
            pred = knn_predict(np.array(temp_X), np.array(temp_y), S_X[i])
            if pred == S_y[i]:  # Point can be removed
                del S_X[i]
                del S_y[i]
                removed = True
    return np.array(S_X), np.array(S_y)

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply RNN
reduced_X_train, reduced_y_train = reduced_nearest_neighbor(X_train, y_train)

# Evaluate using KNN
y_pred = [knn_predict(reduced_X_train, reduced_y_train, test_point) for test_point in X_test]
accuracy = accuracy_score(y_test, y_pred)

# Results
print(f"Original training size: {len(X_train)}")
print(f"Reduced training size: {len(reduced_X_train)}")
print(f"Accuracy with reduced dataset: {accuracy * 100:.2f}%")


Original training size: 105
Reduced training size: 10
Accuracy with reduced dataset: 75.56%


In [4]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def knn_predict(X_train, y_train, test_point, k=3):
    """
    Perform KNN classification for a single test point.
    """
    distances = np.linalg.norm(X_train - test_point, axis=1)
    nearest_indices = np.argsort(distances)[:k]
    nearest_labels = y_train[nearest_indices]
    unique, counts = np.unique(nearest_labels, return_counts=True)
    return unique[np.argmax(counts)]

def edited_nearest_neighbor(X_train, y_train, k=3):
    """
    Edited Nearest Neighbor algorithm to clean the dataset.
    """
    S_X, S_y = list(X_train), list(y_train)
    
    removed = True
    while removed:
        removed = False
        indices_to_remove = []
        
        for i in range(len(S_X)):
            temp_X = S_X[:i] + S_X[i+1:]  # All points except the current one
            temp_y = S_y[:i] + S_y[i+1:]
            
            pred = knn_predict(np.array(temp_X), np.array(temp_y), S_X[i], k=k)
            if pred != S_y[i]:  # Misclassified point
                indices_to_remove.append(i)
        
        # Remove all misclassified points at once
        for index in sorted(indices_to_remove, reverse=True):
            del S_X[index]
            del S_y[index]
            removed = True
    
    return np.array(S_X), np.array(S_y)

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply ENN
k = 3
cleaned_X_train, cleaned_y_train = edited_nearest_neighbor(X_train, y_train, k=k)

# Evaluate using KNN
y_pred = [knn_predict(cleaned_X_train, cleaned_y_train, test_point, k=k) for test_point in X_test]
accuracy = accuracy_score(y_test, y_pred)

# Results
print(f"Original training size: {len(X_train)}")
print(f"Cleaned training size: {len(cleaned_X_train)}")
print(f"Accuracy with cleaned dataset: {accuracy * 100:.2f}%")


Original training size: 105
Cleaned training size: 100
Accuracy with cleaned dataset: 95.56%


In [5]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def knn_predict(X_train, y_train, test_point, k=1):
    """
    Perform KNN classification for a single test point.
    """
    distances = np.linalg.norm(X_train - test_point, axis=1)
    nearest_indices = np.argsort(distances)[:k]
    nearest_labels = y_train[nearest_indices]
    unique, counts = np.unique(nearest_labels, return_counts=True)
    return unique[np.argmax(counts)]

def selective_nearest_neighbor(X_train, y_train, k=1):
    """
    Selective Nearest Neighbor algorithm to reduce the dataset size.
    """
    S_X, S_y = [], []  # Start with an empty subset
    
    for i in range(len(X_train)):
        if len(S_X) == 0:
            S_X.append(X_train[i])
            S_y.append(y_train[i])
        else:
            pred = knn_predict(np.array(S_X), np.array(S_y), X_train[i], k=k)
            if pred != y_train[i]:  # Misclassified point
                S_X.append(X_train[i])
                S_y.append(y_train[i])
    
    return np.array(S_X), np.array(S_y)

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply SNN
k = 3
selected_X_train, selected_y_train = selective_nearest_neighbor(X_train, y_train, k=k)

# Evaluate using KNN
y_pred = [knn_predict(selected_X_train, selected_y_train, test_point, k=k) for test_point in X_test]
accuracy = accuracy_score(y_test, y_pred)

# Results
print(f"Original training size: {len(X_train)}")
print(f"Selected training size: {len(selected_X_train)}")
print(f"Accuracy with selected dataset: {accuracy * 100:.2f}%")


Original training size: 105
Selected training size: 18
Accuracy with selected dataset: 88.89%


In [6]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

def find_voronoi_boundary_points(X_train, y_train, k=3):
    """
    Identify boundary points using a Voronoi-based approach.
    """
    # Train a KNN classifier
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    
    boundary_points = []
    boundary_labels = []
    
    for i, point in enumerate(X_train):
        # Predict the label of the point using its neighbors
        neighbors = knn.kneighbors([point], return_distance=False)[0]
        neighbor_labels = y_train[neighbors]
        
        # Check if neighbors belong to more than one class
        if len(np.unique(neighbor_labels)) > 1:
            boundary_points.append(point)
            boundary_labels.append(y_train[i])
    
    return np.array(boundary_points), np.array(boundary_labels)

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply VBNN
k = 3
boundary_X_train, boundary_y_train = find_voronoi_boundary_points(X_train, y_train, k=k)

# Evaluate using KNN
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(boundary_X_train, boundary_y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Results
print(f"Original training size: {len(X_train)}")
print(f"Boundary training size: {len(boundary_X_train)}")
print(f"Accuracy with boundary dataset: {accuracy * 100:.2f}%")


Original training size: 105
Boundary training size: 9
Accuracy with boundary dataset: 44.44%
