# Calcualting PCA standard format

In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
# sample data frame
data_df = pd.DataFrame({"x1": [2.5, 0.5, 2.2, 1.9,3.1,2.3,2.0, 1.0,1.5, 1.1],
                        "x2":[2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]})

In [3]:
# calculating mean of the x1 and x2 columns
x1_mean = data_df["x1"].mean()
x2_mean = data_df["x2"].mean()

In [4]:
x1_mean,x2_mean

(1.81, 1.9100000000000001)

#### step 1 : standardization of data

In [5]:
# Standardizing the values using x - x_mean
data_df['x1-mean'] = data_df['x1'] - x1_mean
data_df['x2-mean'] = data_df['x2'] - x2_mean

In [6]:
data_df

Unnamed: 0,x1,x2,x1-mean,x2-mean
0,2.5,2.4,0.69,0.49
1,0.5,0.7,-1.31,-1.21
2,2.2,2.9,0.39,0.99
3,1.9,2.2,0.09,0.29
4,3.1,3.0,1.29,1.09
5,2.3,2.7,0.49,0.79
6,2.0,1.6,0.19,-0.31
7,1.0,1.1,-0.81,-0.81
8,1.5,1.6,-0.31,-0.31
9,1.1,0.9,-0.71,-1.01


In [7]:
df = data_df[['x1-mean', 'x2-mean']]

In [8]:
# converting dataframe to array
X= df.to_numpy()

In [9]:
X

array([[ 0.69,  0.49],
       [-1.31, -1.21],
       [ 0.39,  0.99],
       [ 0.09,  0.29],
       [ 1.29,  1.09],
       [ 0.49,  0.79],
       [ 0.19, -0.31],
       [-0.81, -0.81],
       [-0.31, -0.31],
       [-0.71, -1.01]])

In [10]:
C = (X.T@X)/9

In [11]:
C

array([[0.61655556, 0.61544444],
       [0.61544444, 0.71655556]])

#### Step 2 : calcualte eigen values and eigen vectors

In [12]:
eig_vlaues, eig_vectors = np.linalg.eig(C)

In [13]:
eig_vlaues

array([0.0490834 , 1.28402771])

In [14]:
eig_vectors

array([[-0.73517866, -0.6778734 ],
       [ 0.6778734 , -0.73517866]])

In [15]:
eig_vectors

array([[-0.73517866, -0.6778734 ],
       [ 0.6778734 , -0.73517866]])

#### Step 3 : Transform Original Dataset

In [16]:
## Z = X * eigen vectors
Z = X@ eig_vectors

In [17]:
Z

array([[-0.17511531, -0.82797019],
       [ 0.14285723,  1.77758033],
       [ 0.38437499, -0.99219749],
       [ 0.13041721, -0.27421042],
       [-0.20949846, -1.67580142],
       [ 0.17528244, -0.9129491 ],
       [-0.3498247 ,  0.09910944],
       [ 0.04641726,  1.14457216],
       [ 0.01776463,  0.43804614],
       [-0.16267529,  1.22382056]])

#### Step 4 : Reconstructing data

In [18]:
# reconstruction original data X = Z* transpose of eigne vectors
x = Z @ eig_vectors.T

In [19]:
x

array([[ 0.69,  0.49],
       [-1.31, -1.21],
       [ 0.39,  0.99],
       [ 0.09,  0.29],
       [ 1.29,  1.09],
       [ 0.49,  0.79],
       [ 0.19, -0.31],
       [-0.81, -0.81],
       [-0.31, -0.31],
       [-0.71, -1.01]])

#### step 5 to reconstruct original data 

In [20]:
x+np.array([x2_mean, x1_mean])

array([[2.6, 2.3],
       [0.6, 0.6],
       [2.3, 2.8],
       [2. , 2.1],
       [3.2, 2.9],
       [2.4, 2.6],
       [2.1, 1.5],
       [1.1, 1. ],
       [1.6, 1.5],
       [1.2, 0.8]])

# Calcualting PCA with co-varience Matrix

In [21]:
# sample data frame
data_df = pd.DataFrame({"x1": [4, 8, 13, 7],
                        "x2":[11, 4, 5, 14]})

#### Step 1: calcuating Mean

In [22]:
# calculating mean of the x1 and x2 columns
x1_mean = data_df["x1"].mean()
x2_mean = data_df["x2"].mean()

#### Step 2: Stadardizing values

In [23]:
# Standardizing the values using x - x_mean
data_df['x1-mean'] = data_df['x1'] - x1_mean
data_df['x2-mean'] = data_df['x2'] - x2_mean

In [24]:
C = data_df[["x1", "x2"]].to_numpy()

In [25]:
C

array([[ 4, 11],
       [ 8,  4],
       [13,  5],
       [ 7, 14]], dtype=int64)

#### Step 3: Calcualte covarience Matrix

In [26]:
cov_matrix = np.cov(C.T)

In [27]:
cov_matrix

array([[ 14., -11.],
       [-11.,  23.]])

#### Step 4 : calcualte eigen values and eigen vectors

In [28]:
# Calcualte eigen values and eigen vectores
eig_vlaues, eig_vectors = np.linalg.eig(cov_matrix)

In [29]:
eig_vlaues

array([ 6.61513568, 30.38486432])

In [30]:
eig_vectors

array([[-0.83025082,  0.55738997],
       [-0.55738997, -0.83025082]])

In [31]:
std_arr = data_df[["x1-mean", "x2-mean"]].to_numpy()

#### step 5: new dataset

In [32]:
eig_vectors.T.shape

(2, 2)

In [33]:
std_arr.shape

(4, 2)

In [34]:
PC= eig_vectors.T @ std_arr.T

In [35]:
PC[1]

array([-4.30518692,  3.73612869,  5.69282771, -5.12376947])

In [36]:
PC[0]

array([ 1.92752836,  2.50825486, -2.20038921, -2.23539401])

# Image processing

In [37]:
import os

In [38]:
directory_path = 'input_images/'

In [41]:
smallest_image = None
smallest_size = None
smallest_dims = None
for filename in os.listdir(directory_path):
    filepath = os.path.join(directory_path, filename)
    if os.path.isfile(filepath):
        img = Image.open(filepath)
        width, height = img.size
        area = width * height
        if smallest_size is None or area < smallest_size:
            smallest_size = area
            smallest_image = filepath
            smallest_dims = (width, height)
print("Smallest image:", smallest_image)
print("Size (width, height):", smallest_dims)
print("Total pixels:", smallest_size)

Smallest image: input_images/19.JPG
Size (width, height): (236, 268)
Total pixels: 63248


In [44]:
for filename in os.listdir(directory_path):
    filepath = os.path.join(directory_path, filename)
    if os.path.isfile(filepath):
        filename = os.path.basename(filepath)
        img = Image.open(filepath)
        img_array = np.array(img)
        #output_img = np.zeros(img_array.shape, dtype=np.int64)
        output_array = np.zeros_like(img_array)
        height, width, channels = img_array.shape
        for i in range(height):
            for j in range(width):
                avg_value = int(np.mean(img_array[i, j]))
                for c in range(channels):
                    output_array[i, j, c] = avg_value
        output_img = Image.fromarray(output_array)
        resized_img = output_img.resize(smallest_dims, Image.LANCZOS)
        resized_img.save("output/"+filename)
        #img = Image.fromarray(output_img)
        #img.save("output//"+filename)