## Principal Component Analysis

In [1]:
# Import libraries offering the access to resources by URL

import urllib.request

In [2]:
# Import libraries to draw figures and graphs.

%matplotlib inline
import matplotlib.pyplot as plt 
from pandas import plotting

In [3]:
# Decide the resources on the web.

url = "https://gist.githubusercontent.com/ereyester/5bf6d48fe966238632eca537756a06b0/raw/805c2eea83c7608d4d85ec15e56761133dc5ff4d/high_male2.csv"

# Download resources from the url, and then name them.
urllib.request.urlretrieve(url, 'physical_fitness_measurement_male.csv')

('physical_fitness_measurement_male.csv',
 <http.client.HTTPMessage at 0x178193040>)

In [5]:
import numpy as np 
import pandas as pd
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from IPython.display import display

high_male_2 = pd.read_csv('physical_fitness_measurement_male.csv', delimiter=',')
high_male_3 = high_male_2[['握力', '上体起こし', '長座体前屈', '反復横跳び', 'シャトルラン', 'X50m走', '立ち幅跳び', 'ハンドボール投げ']]
display(high_male_3)

Unnamed: 0,握力,上体起こし,長座体前屈,反復横跳び,シャトルラン,X50m走,立ち幅跳び,ハンドボール投げ
0,35,33,55,49,112,7.0,235,31
1,33,29,48,57,70,7.4,205,29
2,34,31,45,54,76,8.0,237,22
3,40,31,55,52,76,7.5,225,23
4,37,32,62,56,87,7.8,240,26
...,...,...,...,...,...,...,...,...
131,31,33,41,53,67,8.2,195,21
132,35,38,68,64,100,7.7,220,26
133,35,29,63,53,86,7.7,217,27
134,32,40,59,52,94,7.5,211,26


In [11]:
# PCA with scikit learn
# Standardization with StandardScaler

std_sc = StandardScaler()
std_sc.fit(high_male_3)
std_data = std_sc.transform(high_male_3)
std_data_df = pd.DataFrame(std_data, columns=high_male_3.columns)
display(std_data_df)

Unnamed: 0,握力,上体起こし,長座体前屈,反復横跳び,シャトルラン,X50m走,立ち幅跳び,ハンドボール投げ
0,-0.429875,0.004405,-0.026291,-1.470359,1.351292,-0.518153,0.349357,0.590782
1,-0.739204,-0.794312,-0.920169,0.129392,-1.270508,0.320762,-1.078876,0.248883
2,-0.584539,-0.394954,-1.303259,-0.470515,-0.895965,1.579134,0.444572,-0.947766
3,0.343445,-0.394954,-0.026291,-0.870453,-0.895965,0.530490,-0.126721,-0.776816
4,-0.120547,-0.195275,0.867588,-0.070577,-0.209303,1.159677,0.587396,-0.263967
...,...,...,...,...,...,...,...,...
131,-1.048532,0.004405,-1.814047,-0.670484,-1.457779,1.998592,-1.554953,-1.118716
132,-0.429875,1.002801,1.633769,1.529174,0.602206,0.949948,-0.364759,-0.263967
133,-0.429875,-0.794312,0.995285,-0.670484,-0.271727,0.949948,-0.507583,-0.093017
134,-0.893868,1.402159,0.484497,-0.870453,0.227663,0.530490,-0.793229,-0.263967


In [18]:
# Execute PCA.
pca = PCA()
pca.fit(std_data_df)
# Mapping data into the Principal Component space.
pca_cor = pca.transform(std_data_df)

# print(pca.get_covariance()) # Variance & Covariance matrix

# Display the eigenvalue vector matrix.
eig_vec = pd.DataFrame(pca.components_.T, index=high_male_3.columns, \
    columns=["PC{}".format(x + 1) for x in range(len(std_data_df.columns))])
display(eig_vec)

# Eigenvalue
eig = pd.DataFrame(pca.explained_variance_, index=["PC{}".format(x + 1) for x in range(len(std_data_df.columns))], columns=['固有値']).T
display(eig)

# Standard deviation of PCA.
dv = np.sqrt(eig)
dv = dv.rename(index={"固有値":"主成分の標準偏差"})
display(dv)

# Contribution rate
ev = pd.DataFrame(pca.explained_variance_ratio_, index=["PC{}".format(x + 1) for x in range(len(std_data_df.columns))], columns=['寄与率']).T
display(ev)

# Culmulated contribution rate
t_ev = pd.DataFrame(pca.explained_variance_ratio_.cumsum(), index=["PC{}".format(x + 1) for x in range(len(std_data_df.columns))], columns=['累積寄与率']).T
display (t_ev)

print('主成分得点')
cor = pd.DataFrame(pca_cor, columns=["PC{}".format(x + 1) for x in range(len(std_data_df.columns))])
display(cor)

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
握力,-0.325202,0.268218,0.532974,-0.399934,-0.365366,0.314417,0.342095,0.170043
上体起こし,-0.314119,0.435167,-0.422258,-0.408344,0.403225,0.333213,-0.294312,-0.081685
長座体前屈,-0.307786,0.374578,-0.015031,0.759876,-0.241145,0.287767,-0.102389,-0.189412
反復横跳び,-0.393395,0.120362,-0.051835,0.204047,0.496749,-0.356385,0.611981,0.195297
シャトルラン,-0.313262,-0.444422,-0.597602,-0.017037,-0.390053,0.217597,0.175419,0.341579
X50m走,0.405718,0.462051,-0.117292,0.106365,-0.070993,-0.042159,-0.08598,0.763296
立ち幅跳び,-0.368104,-0.366939,0.400185,0.139333,0.305585,0.100496,-0.505946,0.436842
ハンドボール投げ,-0.3845,0.195568,-0.06075,-0.15246,-0.385284,-0.721849,-0.342347,-0.016367


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
固有値,3.900707,0.913902,0.833576,0.737092,0.540838,0.473753,0.412597,0.246793


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
主成分の標準偏差,1.975021,0.955982,0.913004,0.858541,0.735417,0.688297,0.642337,0.496783


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
寄与率,0.484003,0.113398,0.103431,0.091459,0.067108,0.058784,0.051195,0.030622


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
累積寄与率,0.484003,0.597401,0.700832,0.792291,0.859399,0.918182,0.969378,1.0


主成分得点


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
0,-0.264351,-1.152818,-0.797203,-0.269405,-1.176372,0.307293,-1.142910,-0.146622
1,1.551796,0.284378,0.223317,-0.185332,0.283178,-1.386061,0.364496,-0.525765
2,2.022502,-0.094115,0.484906,-0.301600,0.873243,-0.055456,-0.431644,1.196852
3,1.204130,0.343509,0.865023,-0.001203,-0.138412,0.609716,-0.169004,-0.018119
4,0.282616,0.560862,0.248988,1.021810,0.001542,0.326985,-0.459037,0.891940
...,...,...,...,...,...,...,...,...
131,3.431767,0.883587,-0.416232,-0.906398,0.871839,-0.361471,0.157571,0.400616
132,-0.847096,1.370626,-1.357610,1.396121,0.614961,0.369032,0.625231,0.609947
133,1.039876,0.558900,-0.020427,1.164885,-0.817018,0.042483,-0.266201,0.084388
134,0.580950,0.830513,-1.530320,-0.042263,0.075586,0.773810,-0.814743,-0.387807
