<a href="https://colab.research.google.com/github/priyanshu7466/Python/blob/main/Kernel_PCA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
from PIL import Image

# Enter the paths of the folders below
folder_paths = ['/content/drive/MyDrive/Data/Cloud', '/content/drive/MyDrive/Data/Desert', '/content/drive/MyDrive/Data/Green Area','/content/drive/MyDrive/Data/Water']

X = []  # to store images
Y = []  # to store labels

common_size = (60,60)

for folder_path in folder_paths:
    label = os.path.basename(folder_path)  # label from the folder name
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.jpg') or file_name.endswith('.png'):
            # Load image
            image_path = os.path.join(folder_path, file_name)
            image = Image.open(image_path)
            # Convert to RGB
            if image.mode != 'RGB':
                image = image.convert('RGB')
            # Resize image
            image = image.resize(common_size)
            # Convert image to numpy array
            image_array = np.array(image)
            # Remove alpha channel
            if image_array.shape[2] == 4:
                image_array = image_array[:, :, :3]
            # Append image and label to X and Y
            X.append(image_array)
            Y.append(label)

# Convert lists to numpy arrays
X = np.array(X)
Y = np.array(Y)


indices = np.arange(len(X))
#np.random.shuffle(indices)
X = X[indices]
Y = Y[indices]

print("X shape:", X.shape)
print("Y shape:", Y.shape)

X shape: (80, 60, 60, 3)
Y shape: (80,)


In [None]:
import numpy as np

# Define the RBF (Gaussian) kernel
def rbf_kernel(X, gamma=1.0):
    n_samples = X.shape[0]
    K = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            diff = X[i] - X[j]
            K[i, j] = np.exp(-gamma * np.dot(diff, diff))
    return K

# Kernel PCA implementation
def kernel_pca(X, n_components, kernel_func, **kwargs):
    # Compute kernel matrix
    K = kernel_func(X, **kwargs)

    # Center kernel matrix
    n_samples = K.shape[0]
    one_n = np.ones((n_samples, n_samples)) / n_samples
    K_centered = K - np.dot(one_n, K) - np.dot(K, one_n) + np.dot(np.dot(one_n, K), one_n)

    # Eigen decomposition
    eigvals, eigvecs = np.linalg.eigh(K_centered)
    idx = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, idx]

    # Project data into principal component space
    alphas = eigvecs[:, :n_components] / np.sqrt(eigvals[idx][:n_components])

    return alphas

# Sample classification algorithm (k-nearest neighbors)
def knn_classifier(X_train, y_train, X_test, k=3):
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]
    distances = np.zeros((n_test, n_train))
    for i in range(n_test):
        for j in range(n_train):
            distances[i, j] = np.linalg.norm(X_test[i] - X_train[j])
    nearest_neighbors = np.argsort(distances, axis=1)[:, :k]
    y_pred = np.zeros(n_test)
    for i in range(n_test):
        nearest_labels = y_train[nearest_neighbors[i]]
        y_pred[i] = np.bincount(nearest_labels).argmax()
    return y_pred

# Example usage
if __name__ == "__main__":
    # Generate synthetic data (replace with your remote sensing image data)

    # Flatten images
    X_flat = X.reshape(X.shape[0], -1)  # Flatten each image into a 1D array

    # Kernel PCA with RBF kernel
    n_components = 2
    alphas = kernel_pca(X_flat, n_components, rbf_kernel, gamma=0.1)

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(Y)

    # Classification using k-nearest neighbors
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(alphas, y_encoded, test_size=0.2, random_state=42)
    y_pred = knn_classifier(X_train, y_train, X_test)

    y_pred_decoded = label_encoder.inverse_transform(y_pred.astype(int))

    # Evaluation
    accuracy = np.mean(y_pred == y_test)
    print("Accuracy:", accuracy)

Accuracy: 0.3125


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from scipy.io import loadmat
from sklearn.preprocessing import LabelEncoder
def gaussian_kernel(X, sigma=1.0):
    n_samples = X.shape[0]
    K = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            diff = X[i] - X[j]
            K[i, j] = np.exp(-np.dot(diff, diff) / (2 * sigma ** 2))
    return K

def kernel_pca(X, n_components, kernel_func, **kwargs):
    K = kernel_func(X, **kwargs)
    n_samples = K.shape[0]
    one_n = np.ones((n_samples, n_samples)) / n_samples
    K_centered = K - np.dot(one_n, K) - np.dot(K, one_n) + np.dot(np.dot(one_n, K), one_n)
    eigvals, eigvecs = np.linalg.eigh(K_centered)
    idx = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, idx]
    alphas = eigvecs[:, :n_components] / np.sqrt(eigvals[idx][:n_components])

    return alphas

def knn_classifier(X_train, y_train, X_test, k=3):
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]
    distances = np.zeros((n_test, n_train))
    for i in range(n_test):
        for j in range(n_train):
            distances[i, j] = np.linalg.norm(X_test[i] - X_train[j])
    nearest_neighbors = np.argsort(distances, axis=1)[:, :k]
    y_pred = np.zeros(n_test)
    for i in range(n_test):
        nearest_labels = y_train[nearest_neighbors[i]]
        y_pred[i] = np.bincount(nearest_labels).argmax()
    return y_pred

if __name__ == "__main__":
    X_flat = X.reshape(X.shape[0], -1)
    n_components = 2
    alphas = kernel_pca(X_flat, n_components, gaussian_kernel, sigma=1.0)

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(Y)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(alphas, y_encoded, test_size=0.1, random_state=42)
    y_pred = knn_classifier(X_train, y_train, X_test)

    y_pred_decoded = label_encoder.inverse_transform(y_pred.astype(int))

    accuracy = np.mean(y_pred == y_test)
    print("Accuracy:", accuracy)

  K[i, j] = np.exp(-np.dot(diff, diff) / (2 * sigma ** 2))


Accuracy: 0.5


In [None]:
#Polynomial Kernel

def polynomial_kernel(X, degree=2):
    return (np.dot(X, X.T) + 1) ** degree

def kernel_pca(X, n_components, kernel_func, **kwargs):
    K = kernel_func(X, **kwargs)

    n_samples = K.shape[0]
    one_n = np.ones((n_samples, n_samples)) / n_samples
    K_centered = K - np.dot(one_n, K) - np.dot(K, one_n) + np.dot(np.dot(one_n, K), one_n)

    eigvals, eigvecs = np.linalg.eigh(K_centered)
    idx = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, idx]

    alphas = eigvecs[:, :n_components] / np.sqrt(eigvals[idx][:n_components])

    return alphas

def knn_classifier(X_train, y_train, X_test, k=3):
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]
    distances = np.zeros((n_test, n_train))
    for i in range(n_test):
        for j in range(n_train):
            distances[i, j] = np.linalg.norm(X_test[i] - X_train[j])
    nearest_neighbors = np.argsort(distances, axis=1)[:, :k]
    y_pred = np.zeros(n_test)
    for i in range(n_test):
        nearest_labels = y_train[nearest_neighbors[i]]
        y_pred[i] = np.bincount(nearest_labels).argmax()
    return y_pred

if __name__ == "__main__":
    X_flat = X.reshape(X.shape[0], -1)
    n_components = 2
    alphas = kernel_pca(X_flat, n_components, polynomial_kernel, degree=3)

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(Y)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(alphas, y_encoded, test_size=0.3, random_state=42)
    y_pred = knn_classifier(X_train, y_train, X_test)

    y_pred_decoded = label_encoder.inverse_transform(y_pred.astype(int))

    accuracy = np.mean(y_pred == y_test)
    print("Accuracy:", accuracy)

Accuracy: 0.20833333333333334


In [None]:
import os
import numpy as np
from PIL import Image

image_path = "/content/drive/MyDrive/Data/Green Area/Forest_23.jpg"  # to store images
z = []

# Define common size for images
common_size = (3,3)

image = Image.open(image_path)
# Convert to RGB if necessary
if image.mode != 'RGB':
    image = image.convert('RGB')
# Resize image
image = image.resize(common_size)
# Convert image to numpy array
image_array = np.array(image)
# Remove alpha channel if present
if image_array.shape[2] == 4:
    image_array = image_array[:, :, :3]
z.append(image_array)

# Convert lists to numpy arrays
z = np.array(z)

indices = np.arange(len(z))
z = z[indices]

z = z[0]
print("Z shape:", z.shape)
# print(X)

Z shape: (3, 3, 3)


In [None]:
# Example of kernel matrix for gaussian matrix

import numpy as np
import matplotlib.pyplot as plt

# Function to compute Gaussian kernel
def gaussian_kernel(z, sigma=1.0):
    n_samples = z.shape[0]
    c1=0
    c2=0
    K = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            diff = z[i][c1] - z[j][c2]
            c2 = c2+1
            norm_sq = np.dot(diff, diff)
            K[i, j] = np.exp(-norm_sq / (2 * (sigma ** 2)))
        c2 = 0
        c1 = c1+1
    return K

K = gaussian_kernel(z,1)
print(K)

[[1.00000000e+00 3.19122539e+54 7.12058633e+53]
 [3.19122539e+54 1.00000000e+00 2.35801434e+55]
 [7.12058633e+53 2.35801434e+55 1.00000000e+00]]


  K[i, j] = np.exp(-norm_sq / (2 * (sigma ** 2)))
