In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
def standard_scale(x):
    res = (x - x.mean(axis=0)) / x.std(axis=0)
    return res

In [3]:
def classification(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=50)
    
    model = LogisticRegression(random_state=10)
    model.fit(X_train, y_train)
    
    print(f"train {f1_score(y_train, model.predict(X_train), average='micro')}")
    print(f"test {f1_score(y_test, model.predict(X_test), average='micro')}")

In [4]:
iris = load_iris()
X = iris.data
y = iris.target

X = X.astype(float)

X = standard_scale(X)

In [5]:
classification(X, y)

train 0.9714285714285714
test 0.9777777777777777


In [6]:
covariance_matrix = X.T @ X

eig_values, eig_vectors = np.linalg.eig(covariance_matrix)

eig_pairs = [(np.abs(eig_values[i]), eig_vectors[:, i]) for i in range(len(eig_values))]

eig_pairs.sort(key=lambda x: x[0], reverse=True)

W = np.hstack([eig_pairs[i][1].reshape(4,1) for i in range(2)])

Z = X.dot(W)

In [7]:
classification(Z, y)

train 0.9142857142857143
test 0.8888888888888888
