In [29]:
import pandas as pd
import numpy as np

df = pd.read_csv(
    filepath_or_buffer='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 
    header=None, 
    sep=',')

df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
df.dropna(how="all", inplace=True) # drops the empty line at file-end

df.tail()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [30]:
X = df[['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid']].values
y = df['class'].values
Z = df[['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid']]

In [31]:
mean_p = df["sepal_len"].mean()
mean_q = df["sepal_wid"].mean()
mean_r = df["petal_len"].mean()
mean_s = df["petal_wid"].mean()

Z.mean()

sepal_len    5.843333
sepal_wid    3.054000
petal_len    3.758667
petal_wid    1.198667
dtype: float64

In [32]:
y = Z - Z.mean()
t = y.values

In [33]:
t

array([[ -7.43333333e-01,   4.46000000e-01,  -2.35866667e+00,
         -9.98666667e-01],
       [ -9.43333333e-01,  -5.40000000e-02,  -2.35866667e+00,
         -9.98666667e-01],
       [ -1.14333333e+00,   1.46000000e-01,  -2.45866667e+00,
         -9.98666667e-01],
       [ -1.24333333e+00,   4.60000000e-02,  -2.25866667e+00,
         -9.98666667e-01],
       [ -8.43333333e-01,   5.46000000e-01,  -2.35866667e+00,
         -9.98666667e-01],
       [ -4.43333333e-01,   8.46000000e-01,  -2.05866667e+00,
         -7.98666667e-01],
       [ -1.24333333e+00,   3.46000000e-01,  -2.35866667e+00,
         -8.98666667e-01],
       [ -8.43333333e-01,   3.46000000e-01,  -2.25866667e+00,
         -9.98666667e-01],
       [ -1.44333333e+00,  -1.54000000e-01,  -2.35866667e+00,
         -9.98666667e-01],
       [ -9.43333333e-01,   4.60000000e-02,  -2.25866667e+00,
         -1.09866667e+00],
       [ -4.43333333e-01,   6.46000000e-01,  -2.25866667e+00,
         -9.98666667e-01],
       [ -1.04333333e

In [34]:
cov_mat = np.cov([t[0,:],t[1,:],t[2,:]],t[3,:])
print('Covariance Matrix:\n', cov_mat)

Covariance Matrix:
 [[ 1.32431896  1.0863523   1.21380785  1.06647452]
 [ 1.0863523   0.90421896  1.00584119  0.88184119]
 [ 1.21380785  1.00584119  1.13663007  1.00596341]
 [ 1.06647452  0.88184119  1.00596341  0.89529674]]


In [35]:
eig_val_cov, eig_vec_cov = np.linalg.eig(cov_mat)
print( "eig_val_cov=" , eig_val_cov , "\neig_vec_cov=" , eig_vec_cov , "\n")

eig_val_cov= [  4.22474231e+00   2.77122370e-02   8.01019141e-03   5.93497003e-17] 
eig_vec_cov= [[ 0.55734905  0.57146322 -0.60207345  0.01730293]
 [ 0.46062537  0.34082253  0.75880197  0.30965116]
 [ 0.51798307 -0.32501392  0.14868119 -0.77714439]
 [ 0.45703375 -0.6720385  -0.19905033  0.54759782]] 



In [37]:
# Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_val_cov[i]), eig_vec_cov[:,i]) for i in range(len(eig_val_cov))]

# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort(key=lambda x: x[0], reverse=True)

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
for i in eig_pairs:
    print(i[0])


4.22474231229
0.027712237046
0.0080101914076
5.93497003004e-17


In [38]:
matrix_w = np.hstack((eig_pairs[0][1].reshape(4,1), eig_pairs[1][1].reshape(4,1)))
print('Matrix W:\n', matrix_w)

Matrix W:
 [[ 0.55734905  0.57146322]
 [ 0.46062537  0.34082253]
 [ 0.51798307 -0.32501392]
 [ 0.45703375 -0.6720385 ]]


In [41]:
transformed = t.dot(matrix_w)
transformed

array([[ -1.88703097e+00,   1.16496113e+00],
       [ -2.22881347e+00,   8.80257228e-01],
       [ -2.29995651e+00,   8.66630481e-01],
       [ -2.29815734e+00,   7.10399123e-01],
       [ -1.89670334e+00,   1.14189706e+00],
       [ -1.28877444e+00,   1.24081723e+00],
       [ -2.16606466e+00,   7.77943422e-01],
       [ -1.93703011e+00,   1.04123117e+00],
       [ -2.55355053e+00,   5.60443366e-01],
       [ -2.17665600e+00,   9.49041939e-01],
       [ -1.57590288e+00,   1.37206321e+00],
       [ -1.99670161e+00,   8.94437133e-01],
       [ -2.33025175e+00,   8.90314756e-01],
       [ -2.76432119e+00,   7.02087323e-01],
       [ -1.37017057e+00,   1.80039943e+00],
       [ -9.94853654e-01,   1.64767024e+00],
       [ -1.49596767e+00,   1.37082280e+00],
       [ -1.84132760e+00,   1.09775728e+00],
       [ -1.21333564e+00,   1.44537780e+00],
       [ -1.65134168e+00,   1.16750265e+00],
       [ -1.61049388e+00,   1.20481367e+00],
       [ -1.65170084e+00,   1.06621655e+00],
       [ -