In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
file_path = "pop_failures.dat"
df = pd.read_table(file_path, sep="\s+")
print("Dataset Shape:", df.shape)
df.head()

In [None]:
df.info()

In [None]:
df.dropna(inplace = True)

In [None]:
X = df.iloc[:, 2:20].values
y = df.iloc[:, 20].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_pca_train, X_pca_test, _, _ = train_test_split(X_pca, y, test_size=0.2, random_state=42)

k_values = range(1, 21)
accuracies_original = []
accuracies_pca = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred_original = knn.predict(X_test)
    accuracies_original.append(accuracy_score(y_test, y_pred_original))
    
    knn.fit(X_pca_train, y_train)
    y_pred_pca = knn.predict(X_pca_test)
    accuracies_pca.append(accuracy_score(y_test, y_pred_pca))

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(k_values, accuracies_original, label='KNN without PCA', marker='o')
plt.plot(k_values, accuracies_pca, label='KNN with PCA', marker='s')
plt.xlabel("K Value")
plt.ylabel("Accuracy")
plt.title("KNN Accuracy Comparison with and without PCA")
plt.legend()
plt.grid()
plt.show()