<a href="https://colab.research.google.com/github/mohanbaskaran7373-jpg/prediction-of-loan-/blob/main/CUSTOMER_DATA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import numpy as np
import pandas as pd
import time
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# ==========================================
# 1. Dataset Generation (Customer Data)
# ==========================================
# Specification: 500 samples, 10 features, Binary Classification
np.random.seed(42)
n_samples = 500
n_features = 10

# Creating synthetic customer features (e.g., Credit Score, Age, Balance, etc.)
X = np.random.randn(n_samples, n_features)
# Target: 0 (Stayed), 1 (Churned)
y = np.random.randint(0, 2, n_samples)

# Data Scaling (KNN algorithm-ku scale panrathu romba mukkiyam)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-Test Split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ==========================================
# 2. Custom KNN Implementation (NumPy only)
# ==========================================
class CustomerKNN:
    def __init__(self, k=5):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X_test_data):
        predictions = [self._get_single_prediction(x) for x in X_test_data]
        return np.array(predictions)

    def _get_single_prediction(self, x):
        # Step 1: Euclidean Distance calculation
        distances = [np.sqrt(np.sum((x - x_train_point)**2)) for x_train_point in self.X_train]

        # Step 2: K-nearest neighbors indices find panrathu
        k_indices = np.argsort(distances)[:self.k]

        # Step 3: Labels extraction and Majority Voting
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

# ==========================================
# 3. Execution & Reporting Function
# ==========================================
def run_comparison(k_val):
    print(f"\n--- Project Report for K = {k_val} ---")

    # Custom KNN Execution
    start_time = time.time()
    my_model = CustomerKNN(k=k_val)
    my_model.fit(X_train, y_train)
    my_predictions = my_model.predict(X_test)
    custom_duration = time.time() - start_time
    custom_acc = np.mean(my_predictions == y_test) * 100

    # Scikit-Learn KNN Execution (Comparison)
    start_time = time.time()
    sklearn_model = KNeighborsClassifier(n_neighbors=k_val)
    sklearn_model.fit(X_train, y_train)
    sklearn_acc = sklearn_model.score(X_test, y_test) * 100
    sklearn_duration = time.time() - start_time

    # Output Results
    print(f"Custom KNN  | Accuracy: {custom_acc:.2f}% | Time: {custom_duration:.5f}s")
    print(f"Sklearn KNN | Accuracy: {sklearn_acc:.2f}% | Time: {sklearn_duration:.5f}s")

# Running the required K values
run_comparison(5)
run_comparison(10)


--- Project Report for K = 5 ---
Custom KNN  | Accuracy: 45.00% | Time: 0.43684s
Sklearn KNN | Accuracy: 45.00% | Time: 0.00924s

--- Project Report for K = 10 ---
Custom KNN  | Accuracy: 41.00% | Time: 0.44522s
Sklearn KNN | Accuracy: 44.00% | Time: 0.00799s
