The General Aptitude Test Battery (GATB) is an occupationally oriented multi-aptitude test that measures distinct aptitudes using 12 separate tests (8 pencil and paper tests, and 4 performance tests). The tests are supposed to capture people’s different abilities including the cognitive, perceptual, and psychomotor domains. In a study, the 12 GATB tests were administrated to 255 individuals, and the following is the sample covariance matrix of the variables.

In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

In [2]:
data = open('GATB.cov', 'r').read()

In [3]:
data = [i.split(sep='    ') for i in data.split(sep='\n')][:12]
data = [list(map(float, i)) for i in data]

In [4]:
data = pd.DataFrame(data).fillna(0)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.3,1.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.66,0.37,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.34,0.58,0.53,1.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.79,0.47,0.55,0.43,1.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.25,0.76,0.47,0.62,0.37,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.7,0.41,0.58,0.44,0.53,0.4,1.03,0.0,0.0,0.0,0.0,0.0
7,0.26,0.2,0.25,0.17,0.24,0.21,0.39,1.04,0.0,0.0,0.0,0.0
8,0.34,0.3,0.28,0.21,0.29,0.3,0.32,0.49,1.01,0.0,0.0,0.0
9,0.35,0.25,0.28,0.21,0.34,0.23,0.4,0.54,0.77,1.04,0.0,0.0


In [5]:
data = data + data.transpose() - np.diag(np.diag(data))
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1.02,0.3,0.66,0.34,0.79,0.25,0.7,0.26,0.34,0.35,0.39,0.35
1,0.3,1.01,0.37,0.58,0.47,0.76,0.41,0.2,0.3,0.25,0.32,0.33
2,0.66,0.37,1.0,0.53,0.55,0.47,0.58,0.25,0.28,0.28,0.36,0.23
3,0.34,0.58,0.53,1.03,0.43,0.62,0.44,0.17,0.21,0.21,0.27,0.28
4,0.79,0.47,0.55,0.43,1.02,0.37,0.53,0.24,0.29,0.34,0.36,0.27
5,0.25,0.76,0.47,0.62,0.37,1.0,0.4,0.21,0.3,0.23,0.21,0.31
6,0.7,0.41,0.58,0.44,0.53,0.4,1.03,0.39,0.32,0.4,0.44,0.43
7,0.26,0.2,0.25,0.17,0.24,0.21,0.39,1.04,0.49,0.54,0.44,0.42
8,0.34,0.3,0.28,0.21,0.29,0.3,0.32,0.49,1.01,0.77,0.47,0.45
9,0.35,0.25,0.28,0.21,0.34,0.23,0.4,0.54,0.77,1.04,0.48,0.48


In [6]:
pca = PCA() # n_components=
pca.fit(data)

# Cumulative Proportion
pca.explained_variance_ratio_.cumsum()

array([ 0.47265015,  0.71366304,  0.80975114,  0.87060454,  0.91170453,
        0.94501651,  0.96792555,  0.98437379,  0.9930635 ,  0.9978176 ,
        1.        ,  1.        ])

In [7]:
# Explained Variance
pca.explained_variance_

array([  2.79147180e-01,   1.42342212e-01,   5.67496303e-02,
         3.59400207e-02,   2.42736505e-02,   1.96740555e-02,
         1.35300798e-02,   9.71433469e-03,   5.13214129e-03,
         2.80776992e-03,   1.28892565e-03,   6.31198886e-34])

In [8]:
# Loadings
pca.components_

array([[-0.15434553, -0.2996763 , -0.27416897, -0.36385468, -0.23496045,
        -0.32830091, -0.11574407,  0.34154383,  0.34249658,  0.37753058,
         0.24985416,  0.25202301],
       [ 0.58061742, -0.34269224,  0.28847085, -0.19876284,  0.3905689 ,
        -0.38904285,  0.32326431, -0.00678401, -0.05848668,  0.02587764,
         0.08532594, -0.05392813],
       [-0.01648628,  0.04377236, -0.08400364,  0.04706398, -0.09644721,
        -0.06574469,  0.12526136, -0.1568624 , -0.37329348, -0.35477856,
         0.5631442 ,  0.59352562],
       [-0.15626917, -0.26926025,  0.16695971,  0.09652468, -0.31342353,
        -0.05939849,  0.19206032,  0.66533711, -0.38487314, -0.29382499,
        -0.14890179, -0.16712562],
       [-0.09529599, -0.37622855,  0.50640246,  0.44262408, -0.42377534,
        -0.04689821,  0.03107644, -0.34212104,  0.2135363 ,  0.18376888,
         0.13332781,  0.0082855 ],
       [ 0.0827818 ,  0.18171768, -0.06849964, -0.17353552, -0.29532442,
         0.27998617,  

In [9]:
pca = PCA(n_components=3)
pca.fit(data)
pca.components_

array([[-0.15434553, -0.2996763 , -0.27416897, -0.36385468, -0.23496045,
        -0.32830091, -0.11574407,  0.34154383,  0.34249658,  0.37753058,
         0.24985416,  0.25202301],
       [ 0.58061742, -0.34269224,  0.28847085, -0.19876284,  0.3905689 ,
        -0.38904285,  0.32326431, -0.00678401, -0.05848668,  0.02587764,
         0.08532594, -0.05392813],
       [-0.01648628,  0.04377236, -0.08400364,  0.04706398, -0.09644721,
        -0.06574469,  0.12526136, -0.1568624 , -0.37329348, -0.35477856,
         0.5631442 ,  0.59352562]])

Dataset Transormation<br>
pca.fit_transform(Dataset)