In [78]:
import numpy as np
import numpy.linalg as la
import csv

def read_file (filename):
    mat = []
    with open(filename, newline = '') as file:
        csv_reader = csv.reader(file, delimiter=',', quotechar='|')
        next(csv_reader)
        for line in csv_reader:
            row = []
            for entry in line:
                row.append(float(entry))
            mat.append(row)
    return np.matrix(mat)

iris_0 = read_file('iris.csv')
iris_1 = read_file('dataI.csv')
iris_2 = read_file('dataII.csv')
iris_3 = read_file('dataIII.csv')
iris_4 = read_file('dataIV.csv')
iris_5 = read_file('dataV.csv')
irises = [iris_1, iris_2, iris_3, iris_4, iris_5]

In [79]:
def reconstruct_self(data, n_comp = 4):
    mean = np.mean(data, axis = 0)
    data_cntr = data - mean
    cov = np.cov(data_cntr.T)
    eig_val, eig_vec = la.eig(cov)
    pc = eig_vec[:, 0:n_comp]
    result = (pc@(pc.T@data_cntr.T)).T + mean
    return result

In [106]:
def reconstruct_orig(data_0, data_1, n_comp = 4):
    mean_0 = np.mean(data_0, axis = 0)
    data_0_cntr = data_0 - mean_0
    mean_1 = np.mean(data_1, axis = 0)
    data_1_cntr = data_1 - mean_1
    
    cov_0 = np.cov(data_0_cntr.T)
    eig_val_0, eig_vec_0 = la.eig(cov_0)
    pc = eig_vec_0[:, 0:n_comp]
    result = (pc@(pc.T@data_1_cntr.T)).T + mean_1
    return result

In [117]:
# np.set_printoptions(precision=6)
mse_table = np.zeros(shape = (5, 10))

for n in range(5):
    data = irises[n]
    mse = np.square(np.subtract(np.mean(data), data)).mean()
    mse_table[n][0] = mse
    for i in range(1, 5):
        rec = reconstruct_self(data, i)
        mse = np.square(np.subtract(rec, data)).mean()
        mse_table[n][i] = mse
    mse = np.square(np.subtract(np.mean(iris_0), data)).mean()
    mse_table[n][5] = mse
    for i in range(1, 5):
        rec = reconstruct_orig(iris_0, data, i)
        mse = np.square(np.subtract(rec, data)).mean()
        mse_table[n][i + 5] = mse
print(mse_table)

iris_2_recon = np.array(reconstruct_self(iris_2))
print(iris_2_recon)

[[  3.943052e+00   1.241564e-01   4.899975e-02   1.644791e-02
    4.516878e-31   3.943111e+00   1.243032e-01   4.940558e-02
    1.687804e-02   8.024818e-31]
 [  4.118235e+00   2.857413e-01   1.422063e-01   6.284167e-02
    4.284624e-31   4.118235e+00   2.865788e-01   1.479550e-01
    6.666349e-02   1.443526e-30]
 [  4.969214e+00   7.589825e-01   5.779471e-01   2.622694e-01
    2.135274e-30   4.970098e+00   7.668718e-01   4.600133e-01
    2.682419e-01   2.851185e-30]
 [  4.043716e+00   2.595793e-01   8.316367e-02   3.461244e-02
    1.459445e-30   4.047941e+00   2.647613e-01   1.399598e-01
    6.224286e-02   1.621039e-30]
 [  4.348384e+00   8.788269e-01   2.302940e-01   7.477077e-02
    2.424377e-30   4.416244e+00   9.122069e-01   5.115373e-01
    1.677268e-01   4.681947e-30]]
[[  5.207350e+00   2.974641e+00   4.518116e-01   4.852577e-01]
 [  5.508277e+00   3.576869e+00   1.822978e+00   1.693179e-01]
 [  3.456529e+00   2.388334e+00   1.196019e+00  -2.785187e-01]
 [  5.150533e+00   2.3473

In [118]:
import csv
res = [['0N', '1N', '2N', '3N', '4N', '0c', '1c', '2c', '3c', '4c']]
for row in mse_table:
    res.append(row)
    
with open("jpan22-numbers.csv",'w', newline='') as resultFile:
    wr = csv.writer(resultFile)
    wr.writerows(res)

res = [['Sepal.Length','Sepal.Width','Petal.Length','Petal.Width']]
for row in iris_2_recon:
    res.append(row)

with open("jpan22-recon.csv",'w', newline='') as resultFile:
    wr = csv.writer(resultFile)
    wr.writerows(res)

In [91]:
Dataset 1
5.865102 3.055011 3.756222 1.212214
4.8403366 3.2857853 1.4101974 0.2175673 
5.056522 3.544493 1.360780 0.171419 
4.9974172 3.6059949 1.3664187 0.2332827 
4.9524314 3.6399886 1.4241958 0.1512418 
Dataset 2
5.858884 3.112452 3.696876 1.192108
4.60259078 3.38588838 0.93633898 0.07039917 
4.62456632 3.42605349 0.92866838 0.07445531 
5.25215186  3.03882313  0.64568299 -0.02639324 
5.2073501 2.9746407 0.4518116 0.4852577
Dataset 3
5.880815 2.949890 3.754187 1.154181
4.3952141 3.2045005 0.6909603 0.1006563 
4.0023493  2.7448856  0.9164127 -0.1119578 
4.0064782 2.0033990 0.4677494 1.0075537 
4.9114087 1.3422653 0.1671120 0.4458417 
Dataset 4
5.735333 3.057333 3.666000 1.139333
3.7758046  3.4386976 -0.2814564 -0.3329735 
0.1223950  2.7420260  1.3912258 -0.1357079 
0.05412632 2.91312568 1.25406793 0.36721076 
1.179612e-16 3.500000e+00 1.400000e+00 2.000000e-01 
Dataset 5
5.310667 2.893333 3.525333 1.086667
4.3171478 3.0391033 2.1386955 0.6356332 
5.167191 3.068787 1.587304 0.467959 
5.099379645 -0.002734424  1.380795998  0.259522708 
5.100000e+00 -1.301043e-16  1.400000e+00  2.000000e-01 

SyntaxError: invalid syntax (<ipython-input-91-2e45c163b84a>, line 1)

In [42]:
mean_0 = np.mean(iris_0, axis = 0)
mean_1 = np.mean(iris_1, axis = 0)
print(mean_1)
iris_0_m = iris_0 - mean_0
iris_1_m = iris_1 - mean_1
# print(iris_0 - mean_0)

[[ 5.86510233  3.05501118  3.75622226  1.21221435]]


In [43]:
cov_0 = np.cov(iris_0_m.T)
cov_1 = np.cov(iris_1_m.T)
print(cov_1)

[[ 0.75361172 -0.03851283  1.28982591  0.51548592]
 [-0.03851283  0.25117413 -0.31925519 -0.12708641]
 [ 1.28982591 -0.31925519  3.07748174  1.28112553]
 [ 0.51548592 -0.12708641  1.28112553  0.63314597]]


In [101]:
eig_val, eig_vec = la.eig(cov_1)
print(eig_val)
print(eig_vec)
data_re = (eig_vec[:, 0:2]@(eig_vec[:, 0:2].T@iris_1_m.T)).T + mean_1
print(data_re[0])
print(data_re.shape)

[ 4.215455  0.302644  0.131081  0.066233]
[[ 0.371806 -0.628707 -0.560118  0.390839]
 [-0.08373  -0.75237   0.582841 -0.295339]
 [ 0.851185  0.143716  0.053438 -0.50197 ]
 [ 0.360878  0.134208  0.586266  0.712774]]
[[ 5.056522  3.544493  1.36078   0.171419]]
(150, 4)


In [38]:
mean_0 = np.mean(iris_0, axis = 0)
mean_1 = np.mean(iris_1, axis = 0)
mean_2 = np.mean(iris_2, axis = 0)
mean_3 = np.mean(iris_3, axis = 0)
mean_4 = np.mean(iris_4, axis = 0)
cov_0 = np.cov(iris_0.T)
cov_1 = np.cov(iris_1.T)
cov_2 = np.cov(iris_2.T)
cov_3 = np.cov(iris_3.T)
cov_4 = np.cov(iris_4.T)
u_0, s_0, v_0 = la.svd(iris_0, full_matrices=False)
u_1, s_1, v_1 = la.svd(iris_1, full_matrices=False)
u_2, s_2, v_2 = la.svd(iris_2, full_matrices=False)
u_3, s_3, v_3 = la.svd(iris_3, full_matrices=False)
u_4, s_4, v_4 = la.svd(iris_4, full_matrices=False)
# np.allclose(iris_1, (u_1@np.diag(s_1)@v_1))
# np.square(np.subtract(A, B)).mean()
# u_1@np.diag(s_1)@v_1


In [39]:
print(s_1)
print(v_1)

[ 96.20228289  17.73518629   4.45390782   3.16566773]
[[-0.75261193 -0.37922179 -0.51109429 -0.16896375]
 [ 0.27637467  0.55419694 -0.70175345 -0.35217168]
 [ 0.49496094 -0.66533082 -0.05120789 -0.55653062]
 [ 0.33497164 -0.32617709 -0.49366231  0.73328032]]


In [2]:
mean_0 = np.mean(iris_0, axis = 0)
mean_1 = np.mean(iris_1, axis = 0)
mean_2 = np.mean(iris_2, axis = 0)
mean_3 = np.mean(iris_3, axis = 0)
mean_4 = np.mean(iris_4, axis = 0)
cov_0 = np.cov(iris_0.T)
cov_1 = np.cov(iris_1.T)
cov_2 = np.cov(iris_2.T)
cov_3 = np.cov(iris_3.T)
cov_4 = np.cov(iris_4.T)
u_0, s_0, v_0 = la.svd(iris_0, full_matrices=False)
u_1, s_1, v_1 = la.svd(iris_1, full_matrices=False)
u_2, s_2, v_2 = la.svd(iris_2, full_matrices=False)
u_3, s_3, v_3 = la.svd(iris_3, full_matrices=False)
u_4, s_4, v_4 = la.svd(iris_4, full_matrices=False)
# np.allclose(iris_1, (u_1@np.diag(s_1)@v_1))
# np.square(np.subtract(A, B)).mean()
# u_1@np.diag(s_1)@v_1

[[ 5.84333333  3.05733333  3.758       1.19933333]]
[[ 4.87332632  3.28420238  1.45858847  0.23764012]]
[[ 5.08303897  3.51741393  1.40321372  0.21353169]]
[[ 5.09928623  3.50072335  1.40108561  0.1982949 ]]
[[ 5.1  3.5  1.4  0.2]]


In [None]:
Dataset 1
5.865102 3.055011 3.756222 1.212214
4.8403366 3.2857853 1.4101974 0.2175673 
5.056522 3.544493 1.360780 0.171419 
4.9974172 3.6059949 1.3664187 0.2332827 
4.9524314 3.6399886 1.4241958 0.1512418 
Dataset 2
5.858884 3.112452 3.696876 1.192108
4.60259078 3.38588838 0.93633898 0.07039917 
4.62456632 3.42605349 0.92866838 0.07445531 
5.25215186  3.03882313  0.64568299 -0.02639324 
5.2073501 2.9746407 0.4518116 0.4852577
Dataset 3
5.880815 2.949890 3.754187 1.154181
4.3952141 3.2045005 0.6909603 0.1006563 
4.0023493  2.7448856  0.9164127 -0.1119578 
4.0064782 2.0033990 0.4677494 1.0075537 
4.9114087 1.3422653 0.1671120 0.4458417 
Dataset 4
5.735333 3.057333 3.666000 1.139333
3.7758046  3.4386976 -0.2814564 -0.3329735 
0.1223950  2.7420260  1.3912258 -0.1357079 
0.05412632 2.91312568 1.25406793 0.36721076 
1.179612e-16 3.500000e+00 1.400000e+00 2.000000e-01 
Dataset 5
5.310667 2.893333 3.525333 1.086667
4.3171478 3.0391033 2.1386955 0.6356332 
5.167191 3.068787 1.587304 0.467959 
5.099379645 -0.002734424  1.380795998  0.259522708 
5.100000e+00 -1.301043e-16  1.400000e+00  2.000000e-01 