In [12]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.stats import multivariate_normal

In [13]:
import os
import cv2  

data_folder = "Dataset"
image_files = os.listdir(data_folder)
images = []
labels = []

for filename in image_files:
    if filename.endswith(".jpg"):
        # Extract class label from filename
        label = int(filename.split("_")[-1].split(".")[0])
        labels.append(label)
        image_path = os.path.join(data_folder, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 
        resized_image = cv2.resize(image, (64, 64))  # Resize image to 64x64 pixels
        image_flat = resized_image.flatten()  # Flatten resized image into 1D array
        images.append(image_flat)

# Convert lists to numpy arrays
X = np.array(images)
y = np.array(labels)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
## PCA of different classes

In [15]:
# Define PCA function
def pca(X, target_var_ratio):
    # Standardize the data
    X_mean = np.mean(X, axis=0)
    X_std = np.std(X, axis=0)
    X_standardized = (X - X_mean) / X_std

    # Compute covariance matrix
    cov_matrix = np.cov(X_standardized, rowvar=False)

    # Compute eigenvalues and eigenvectors of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    # Sort eigenvalues and eigenvectors in descending order
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    # Compute cumulative explained variance ratio
    explained_var_ratio = np.cumsum(eigenvalues) / np.sum(eigenvalues)

    # Find the number of principal components that explain target_var_ratio of the variation
    num_components = np.argmax(explained_var_ratio >= target_var_ratio) + 1

    # Select the top num_components eigenvectors
    principal_components = eigenvectors[:, :num_components]

    return principal_components

# Step 3: Perform PCA for each class in the training data
target_var_ratio = 0.9
principal_components_per_class = {}

for digit in range(10):
    # Filter the training data for the current class
    X_train_digit = X_train[y_train == digit]

    # Perform PCA
    principal_components = pca(X_train_digit, target_var_ratio)
    principal_components_per_class[digit] = principal_components

# Print the number of principal components for each class
for digit, components in principal_components_per_class.items():
    print(f"Digit {digit}: {components} principal components")


Digit 0: [[ 0.00382001  0.00315794 -0.02943456 ... -0.0125611   0.03058755
   0.00013395]
 [ 0.00232613 -0.00453058 -0.05569537 ... -0.01115975  0.01281268
   0.01054267]
 [ 0.00191541 -0.00534298 -0.05464407 ... -0.00593539  0.00045497
   0.0092326 ]
 ...
 [-0.0085644  -0.00612617  0.0596254  ...  0.00631146 -0.01843602
  -0.01074692]
 [-0.00790281 -0.02184084  0.06018267 ...  0.00843367 -0.026376
  -0.0100369 ]
 [-0.00499384 -0.04077886  0.04751378 ...  0.00488013 -0.02370819
  -0.00482705]] principal components
Digit 1: [[-0.00296788 -0.03448859  0.02623855 ... -0.00938445  0.0120235
  -0.03369139]
 [-0.00187911 -0.02844298  0.05346614 ... -0.00368034  0.00076127
  -0.01582934]
 [-0.00129918 -0.02509976  0.05498393 ... -0.01005978 -0.00579222
  -0.00169468]
 ...
 [ 0.00838189  0.02817118  0.04769521 ... -0.00235915 -0.00123341
   0.00629348]
 [ 0.00773648  0.03529188  0.04984223 ... -0.00216969  0.00411809
   0.00858943]
 [ 0.00708436  0.04674492  0.04260313 ...  0.01180711 -0.00323

In [16]:
# Define PCA function to return first two principal components
def pca_first_two_components(X):
    # Standardize the data
    X_mean = np.mean(X, axis=0)
    X_std = np.std(X, axis=0)
    X_standardized = (X - X_mean) / X_std

    # Compute covariance matrix
    cov_matrix = np.cov(X_standardized, rowvar=False)

    # Compute eigenvalues and eigenvectors of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    # Sort eigenvalues and eigenvectors in descending order
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    # Select the first two eigenvectors
    first_two_components = eigenvectors[:, :2]

    return first_two_components

# Step 4: Find the first two principal components of the whole training data
first_two_components = pca_first_two_components(X_train)

# Print the shape of the first two components
print("Shape of the first two principal components:", first_two_components.shape)


Shape of the first two principal components: (4096, 2)


In [17]:
class_params = {}

for digit in range(10):
    # Filter the training data for the current class
    X_train_digit = X_train[y_train == digit]

    # Perform PCA on the data for the current class to get the first two principal components
    principal_components = pca_first_two_components(X_train_digit)

    # Project the training data onto the first two principal components
    projected_data = np.dot(X_train_digit, principal_components)

    # Compute mean and covariance matrix of the projected data
    mean = np.mean(projected_data, axis=0)
    covariance_matrix = np.cov(projected_data, rowvar=False)

    # Store mean and covariance matrix in a dictionary
    class_params[digit] = {'mean': mean, 'covariance_matrix': covariance_matrix, 'principal_components': principal_components}

# Print the parameters for each class
for digit, params in class_params.items():
    print(f"Digit {digit}:")
    print("Mean:", params['mean'])
    print("Covariance Matrix:")
    print(params['covariance_matrix'])
    print()


KeyboardInterrupt: 

In [None]:
from scipy.stats import multivariate_normal

# Initialize variables to store predicted labels and correct predictions count
predicted_labels = []
correct_predictions = 0

# Loop through each data point in the test set
for i in range(len(X_test)):
    data_point = X_test[i]
    true_label = y_test[i]
    
    # Initialize variables to store likelihoods for each class
    likelihoods = []
    
    # Compute likelihood for each class
    for digit in range(10):
        # Get parameters of the current class
        mean = class_params[digit]['mean']
        covariance_matrix = class_params[digit]['covariance_matrix']
        principal_components = class_params[digit]['principal_components']
        
        # Project the data point onto the first two principal components of the current class
        projected_data = np.dot(data_point, principal_components)
        
        # Compute likelihood using multivariate normal distribution
        likelihood = multivariate_normal.pdf(projected_data, mean=mean, cov=covariance_matrix)
        likelihoods.append(likelihood)
    
    # Classify the data point based on the class with the highest likelihood
    predicted_label = np.argmax(likelihoods)
    predicted_labels.append(predicted_label)
    
    # Check if the predicted label matches the true label
    if predicted_label == true_label:
        correct_predictions += 1

# Calculate success rate
success_rate = correct_predictions / len(X_test)
print("Success rate of the classification method:", success_rate)
