### **K-Nearest Neighbors (KNN)**

It is a widely used algorithm in AI, especially for classification and regression tasks. It is used with a dataset that has already been classified into different categories, KNN is used to classify a new data point based on its nearest neighbors in the dataset.

`Steps to follow:`
<br>
Select the value of K: K represents the number of nearest neighbors that will be considered for classification.<br>
Calculate the distance: Calculate the distance between the new data point and each point in the dataset.<br>
Identify the nearest neighbors: Select the K nearest points to the new data point.<br>
Assign a class: Assign the most common class among its K nearest neighbors to the new data point.<br>

`USAGE EXAMPLES`
<br>
Product Recommendation: An e-commerce website could use KNN to recommend products to a user based on products they have previously purchased or viewed.

In [None]:
!pip3 install numpy pandas matplotlib scikit-learn imbalanced-learn

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
iris = load_iris()
X = iris.data  # Using all four features
y = iris.target

# Create a DataFrame for better visualization
df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y
df.head()

In [None]:
# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Show the first rows of scaled data
pd.DataFrame(X_scaled, columns=iris.feature_names).head()

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [None]:
# Define the KNN model
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)

In [None]:
# Predict with the test set
y_pred = knn.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:\n', conf_matrix)

# Classification report
class_report = classification_report(y_test, y_pred, target_names=iris.target_names)
print('Classification Report:\n', class_report)

In [None]:
# Apply PCA to reduce dimensionality to 2D for visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Train KNN model again with transformed data for visualization
X_train_pca, X_test_pca, _, _ = train_test_split(X_pca, y, test_size=0.3, random_state=42)
knn_pca = KNeighborsClassifier(n_neighbors=5)
knn_pca.fit(X_train_pca, y_train)

# Create a mesh grid to draw decision boundaries
x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))

# Predict across the entire mesh
Z = knn_pca.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the contour and points
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.RdYlBu)
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, s=30, edgecolor='k', cmap=plt.cm.RdYlBu)

# Add labels
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('KNN: Decision Boundary (PCA)')

print(scatter.legend_elements()[0])
# plt.legend(handles=scatter.legend_elements()[0], labels=iris.target_names)
plt.show()

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Create example dataset
products = {
    'Product': ['Laptop', 'Phone', 'Tablet', 'Smartwatch', 'Headphones', 'TV'],
    'Price': [1000, 800, 500, 300, 200, 150],
    'Popularity': [10, 8, 6, 4, 2, 1]
}
df_products = pd.DataFrame(products)

# Create example user purchase dataset
user_purchases = {
    'Product': ['Laptop', 'Phone', 'Headphones']
}
df_user_purchases = pd.DataFrame(user_purchases)

# Define a function to recommend products to the user using KNN
def recommend_products(df_products, df_user_purchases, k=3):
    # Data preprocessing
    X = df_products[['Price', 'Popularity']]
    y = df_products['Product']
    X_user = df_user_purchases.merge(df_products, on='Product', how='left')[['Price', 'Popularity']].fillna(0)
    
    # Train the KNN model
    knn_model = NearestNeighbors(n_neighbors=k)
    
    knn_model.fit(X)
    
    # Find the k nearest neighbors
    _, indices = knn_model.kneighbors(X_user)
    print(knn_model.kneighbors(X_user))
    
    # Show recommended products
    print("Recommended Products:")
    for i in indices[0]:
        print("- {}".format(df_products.iloc[i]['Product']))

# Call the product recommendation function
recommend_products(df_products, df_user_purchases, k=3)