In [7]:
import pandas as pd
import numpy as np

class KNN_algorithm:
    def __init__(self, data):
        self.data = data
        self.X_train = None
        self.y_train = None

    def loadData(self):
        # Display the first few rows of the data
        print(self.data.head())
        
    @staticmethod
    def euclidean_distance(x1, x2, y1, y2):
        # Correct Euclidean distance formula
        return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)

    def fit(self, X, y):
        # Store training data
        self.X_train = X
        self.y_train = y

    def knn(self, new_point, k=3):
        distances = []
        
        # Calculate distance from new_point to all training data
        for i in range(len(self.X_train)):
            distance = np.sqrt(np.sum((self.X_train.iloc[i] - new_point)**2))
            distances.append((distance, self.y_train.iloc[i].values[0]))
        
        # Sort distances and select k nearest neighbors
        distances.sort(key=lambda x: x[0])
        top_k = distances[:k]

        # Voting: Count the most common label in top_k
        labels = [label for _, label in top_k]
        prediction = max(set(labels), key=labels.count)
        return prediction


# Load dataset
dataFram = pd.read_csv('diabetes.csv')

# Define features and target
X = dataFram[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]
y = dataFram[['Outcome']]

# Create instance of KNN_algorithm class
display_data = KNN_algorithm(dataFram)

# Display data
display_data.loadData()

# Split dataset into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the KNN model
display_data.fit(X_train, y_train)

# Predict a new data point
new_data = [8, 180, 2, 55, 0, 33.6, 0.627, 100]  # Example new data point
prediction = display_data.knn(new_data, k=3)
print(f"The new data is classified as: {prediction}")


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
The new data is classified as: 1


- __init__(self, data): Initializes the class with the dataset.
- loadData(self): Displays the first few rows of the dataset.
- euclidean_distance: Calculates the Euclidean distance (static method).
- fit(self, X, y): Stores training data for later use.
- knn(self, new_point, k): Implements the KNN algorithm to classify a new data point.