# Wczytywanie zbioru danych

In [1]:
from sklearn import datasets
import numpy as np

iris = datasets.load_iris()

In [2]:
import pandas as pd
df = pd.DataFrame(iris.data, columns = iris.feature_names)
df['target'] = iris.target
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


# Podział zbioru danych ze względu na gatunek kwiatu

In [3]:
df0 = df[df['target'] == 0]
df0 = df0.drop(columns=['target'])
df0

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


In [4]:
df1 = df[df['target'] == 1]
df1 = df1.drop(columns=['target'])
df1

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
50,7.0,3.2,4.7,1.4
51,6.4,3.2,4.5,1.5
52,6.9,3.1,4.9,1.5
53,5.5,2.3,4.0,1.3
54,6.5,2.8,4.6,1.5
55,5.7,2.8,4.5,1.3
56,6.3,3.3,4.7,1.6
57,4.9,2.4,3.3,1.0
58,6.6,2.9,4.6,1.3
59,5.2,2.7,3.9,1.4


In [5]:
df2 = df[df['target'] == 2]
df2 = df2.drop(columns=['target'])
df2

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
100,6.3,3.3,6.0,2.5
101,5.8,2.7,5.1,1.9
102,7.1,3.0,5.9,2.1
103,6.3,2.9,5.6,1.8
104,6.5,3.0,5.8,2.2
105,7.6,3.0,6.6,2.1
106,4.9,2.5,4.5,1.7
107,7.3,2.9,6.3,1.8
108,6.7,2.5,5.8,1.8
109,7.2,3.6,6.1,2.5


# Obliczenie macierzy kowariancji

In [6]:
def covariance_matrix(data):
    n, m = data.shape
    
    covariance_matrix = np.zeros((m, m))
    means = np.mean(data, axis = 0)
    
    for i in range(m):
        for j in range(m):
            covariance_matrix[i][j] = np.sum((data[:, i] - means[i])*(data[:, j] - means[j])) / (n-1)
    return covariance_matrix


In [7]:
cov0 = covariance_matrix(np.array(df0))
cov0

array([[0.12424898, 0.09921633, 0.0163551 , 0.01033061],
       [0.09921633, 0.1436898 , 0.01169796, 0.00929796],
       [0.0163551 , 0.01169796, 0.03015918, 0.00606939],
       [0.01033061, 0.00929796, 0.00606939, 0.01110612]])

In [8]:
cov1 = covariance_matrix(np.array(df1))
cov1

array([[0.26643265, 0.08518367, 0.18289796, 0.05577959],
       [0.08518367, 0.09846939, 0.08265306, 0.04120408],
       [0.18289796, 0.08265306, 0.22081633, 0.07310204],
       [0.05577959, 0.04120408, 0.07310204, 0.03910612]])

In [9]:
cov2 = covariance_matrix(np.array(df2))
cov2

array([[0.40434286, 0.09376327, 0.3032898 , 0.04909388],
       [0.09376327, 0.10400408, 0.07137959, 0.04762857],
       [0.3032898 , 0.07137959, 0.30458776, 0.04882449],
       [0.04909388, 0.04762857, 0.04882449, 0.07543265]])

# Obliczenie wartości własnych i wektorów własnych

In [10]:
def eigen_iteration_method(matrix, max_iterations=1000, tolerance=1e-6):
    n = matrix.shape[0]
    eigen_values = []
    eigen_vectors = []

    for _ in range(n):
        eigen_vector = np.random.rand(n)
        eigen_vector = eigen_vector / np.linalg.norm(eigen_vector)

        for _ in range(max_iterations):
            new_eigen_vector = np.dot(matrix, eigen_vector)
            eigen_value = np.dot(new_eigen_vector, eigen_vector)
            new_eigen_vector = new_eigen_vector / np.linalg.norm(new_eigen_vector)

            if np.linalg.norm(new_eigen_vector - eigen_vector) < tolerance:
                eigen_values.append(eigen_value)
                eigen_vectors.append(eigen_vector)
                matrix -= eigen_value * np.outer(eigen_vector, eigen_vector)
                break

            eigen_vector = new_eigen_vector

    return eigen_values, eigen_vectors    

In [11]:
eigen_values, eigen_vectors = eigen_iteration_method(cov0)
print(f"Wartości własne: {eigen_values}")
print(f"Wektory własne: {eigen_vectors}")

Wartości własne: [0.23645569007426337, 0.036918732379557936, 0.0267963986271811, 0.00903326055254155]
Wektory własne: [array([0.66907894, 0.73414727, 0.09654432, 0.06356371]), array([ 0.59787891, -0.62067678,  0.4900575 ,  0.13093812]), array([-0.43996514,  0.27461008,  0.83244739,  0.19506748]), array([-0.03607646, -0.01955069, -0.23990255,  0.96992939])]


# Wizualizacja na płaszczyźnie rozpiętej przez wektory własne

In [12]:
top2_eigenvectors = np.array(eigen_vectors[:2])
top2_eigenvectors

array([[ 0.66907894,  0.73414727,  0.09654432,  0.06356371],
       [ 0.59787891, -0.62067678,  0.4900575 ,  0.13093812]])

In [18]:
eigen_vectors
#top_vectors = eigen_vectors[:, :2]

[array([0.66907894, 0.73414727, 0.09654432, 0.06356371]),
 array([ 0.59787891, -0.62067678,  0.4900575 ,  0.13093812]),
 array([-0.43996514,  0.27461008,  0.83244739,  0.19506748]),
 array([-0.03607646, -0.01955069, -0.23990255,  0.96992939])]

In [22]:
_, vec = np.linalg.eig(cov0)
vec

array([[-0.5363304 , -0.38426188,  0.60645977, -0.44373309],
       [-0.58664722, -0.45674263, -0.61356737,  0.26601936],
       [-0.22731553,  0.12315926,  0.48729138,  0.83409026],
       [ 0.56260323, -0.79281825,  0.13523585,  0.19138468]])

In [None]:
transposed = top2_eigenvectors.transpose()
transposed

In [None]:
projected_data = np.array(df0.dot(transposed))
projected_data

In [None]:
import matplotlib.pyplot as plt

plt.scatter(projected_data[:, 0], projected_data[:, 1])
plt.quiver(0, 0, top2_eigenvectors[0, 0], top2_eigenvectors[1, 0], angles='xy', scale_units='xy', scale=1, color='r', label='Wektor własny 1')
plt.quiver(0, 0, top2_eigenvectors[0, 1], top2_eigenvectors[1, 1], angles='xy', scale_units='xy', scale=1, color='g', label='Wektor własny 2')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.legend()
plt.grid()
plt.ylim(-1, 3)
plt.show()

In [None]:
def normalize_data(matrix):
    mean = np.mean(matrix, axis=0)
    std = np.std(matrix, axis=0)
    normalized_matrix = (matrix - mean) / std
    return normalized_matrix

normalized_data = normalize_data(projected_data)
normalized_data

In [None]:
plt.scatter(normalized_data[:, 0], normalized_data[:, 1])
plt.quiver(0, 0, top2_eigenvectors[0, 0], top2_eigenvectors[1, 0], angles='xy', scale_units='xy', scale=1, color='r', label='Wektor własny 1')
plt.quiver(0, 0, top2_eigenvectors[0, 1], top2_eigenvectors[1, 1], angles='xy', scale_units='xy', scale=1, color='g', label='Wektor własny 2')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.legend()
plt.grid()
plt.show()