# Principal Component Analysis
Jie Guo, Harsh Shah

In [65]:
#loading library
library(psych)

In [82]:
#PCA function that takes dataset and percentage variance as input 

myPCA <- function(df, reqd_per_var){
    X <- as.matrix(df)                 #converting the dataset in matrix form
    X <- t(X)                          #Transposing the matrix
    m = rowMeans(X)                    #Finding mean by each row
    B = sweep(X, 1, m)                 #Centering the data so that mean=0
    Sx = (1/(ncol(B)-1))* B %*% t(B)   #Finds the covariance matrix 
    P = eigen(Sx)$vectors              #Finds principal components      
    var <- eigen(Sx)$values/tr(Sx)     #variance of each component 
    per_var <- var*100                 #Percentage variance
    
    #Counts number of principal components required for satisfying percentage varince
    sum <- 0
    number <- 0
    
    for(i in per_var){
        sum <- sum + i
        number <- number + 1
        
        if(sum>reqd_per_var){
            break
        }
    }
    
    PCA <- P[, 0:number]               #Matrix of required Principal components
    transformed = t(P)%*%B             #Transformed dataset in matrix form
    
    #Returns a list of outputs required
    return(list("Minimum number of components required: " = number, "Principal Components are: " = PCA, 
                "Transformed dataset is: " = transformed))     
}

In [83]:
head(longley)

Unnamed: 0,GNP.deflator,GNP,Unemployed,Armed.Forces,Population,Year,Employed
1947,83.0,234.289,235.6,159.0,107.608,1947,60.323
1948,88.5,259.426,232.5,145.6,108.632,1948,61.122
1949,88.2,258.054,368.2,161.6,109.773,1949,60.171
1950,89.5,284.599,335.1,165.0,110.929,1950,61.187
1951,96.2,328.975,209.9,309.9,112.075,1951,63.221
1952,98.1,346.999,193.2,359.4,113.27,1952,63.639


The above dataset contains 7 features related to economics. We only need first five for PCA

In [84]:
#Running function with first 5 features of longley dataset and 98% variance
myPCA(longley[, -c(6,7)], 98)

0,1,2
-0.08251944,0.03443816,0.04192389
-0.75660491,0.31998587,0.55803783
-0.62690628,-0.57976739,-0.5202748
-0.1573731,0.74840481,-0.6441842
-0.05442221,0.01353004,0.03430775

1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962
186.6376809,171.1615269,84.57309752,84.5343621,106.0293187,94.84981683,85.3918178,-17.0075059,3.8413269,-4.792683126,-29.265157,-137.751035,-111.0940254,-133.18865403,-200.9965329,-182.9233547
-77.4097447,-77.3943407,-144.52821333,-114.2389009,81.2377681,133.81494804,139.8309367,25.3854226,52.9108168,50.452891974,47.130117,-65.416981,-9.1634444,-12.3996908,-53.7205039,23.508919
22.3284369,46.8664665,-34.78083102,-4.8426865,-7.9628785,-20.98268027,-4.3689185,-81.7293255,-7.9455211,20.928929182,32.1800489,-47.092654,24.9604141,32.50811238,-7.9143297,37.8474175
-1.6664798,1.5956618,-0.09210406,-0.9803671,0.8871038,0.52148273,-0.415697,-0.60432,-1.5206114,0.008137067,1.3706287,2.037913,1.2141527,0.64052566,-0.5058873,-2.4901388
0.1633091,0.4265661,0.07790293,-0.4769869,-0.163369,0.06592743,0.4791093,0.1679446,-0.7261931,-0.364580764,0.0730891,0.104452,-0.5185938,-0.07597503,0.3614267,0.4059714
