In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV

In [1]:
digits = load_digits()
X = digits.data
print X.shape
X = pd.DataFrame( X,columns=[ str(i) for i in xrange(X.shape[1]) ] )

X = StandardScaler().fit_transform(X)
X = pd.DataFrame( X,columns=[ str(i) for i in xrange(X.shape[1]) ] )

cov_x = np.cov(X.T)
cov_x.shape

U,S,V = np.linalg.svd(cov_x)

(1797, 64)


In [2]:
def reduce_dim(no_of_components, U, X):
    U_red = U[:,:no_of_components]
    X=np.array(X)
    Z = np.matmul(U_red.T, X.T)
    Z = Z.T
    Z_new = pd.DataFrame( Z,columns=[ "pc"+str(i) for i in xrange(Z.shape[1]) ] )
    return Z_new

In [3]:
U_copy = U.copy()
X_copy = X.copy()
Z_1 = reduce_dim(17, U_copy, X_copy)

U_copy = U.copy()
X_copy = X.copy()
Z_2 = reduce_dim(26, U_copy, X_copy)

U_copy = U.copy()
X_copy = X.copy()
Z_3 = reduce_dim(38, U_copy, X_copy)

In [4]:
print Z_1.shape
print Z_2.shape
print Z_3.shape

(1797, 17)
(1797, 26)
(1797, 38)


In [5]:
def print_score(data):
    params = {'bandwidth': np.logspace(-1, 1, 20)}
    grid = GridSearchCV(KernelDensity(), params, cv=5)
    grid.fit(data)

    print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))

    # use the best estimator to compute the kernel density estimate
    kde = grid.best_estimator_

    score = kde.score_samples(data)
    print score.shape
    for i in score:
        print i

In [6]:
print_score(Z_1)



best bandwidth: 0.88586679041
(1797,)
-19.938361560980532
-20.805547635206672
-20.924139675415425
-20.554492548493613
-20.81389109749327
-20.855744502539714
-20.60467948914603
-21.05285463257176
-20.9449235508846
-20.87607185535047
-20.909581325043334
-20.54433296994671
-21.03404553949374
-20.99907742026493
-20.864772026776336
-21.00293052247496
-20.97597610068911
-20.964493491463656
-20.836636219563015
-20.997383577987176
-20.428314450887324
-20.490792006471665
-20.859791078467477
-21.003879929884572
-20.626986642016846
-21.010518510193652
-20.725530326765682
-21.0259776717188
-20.875412819093583
-20.997306190870788
-20.837013964320107
-21.033471986108864
-21.001771457236202
-21.054062010127637
-20.342959624831472
-21.02240430975759
-20.77438237926394
-21.050634252640585
-21.04363824870695
-20.782486096278774
-20.878155859094694
-20.37073382675871
-20.574836333624603
-21.03165935875312
-20.7109378542879
-20.862380519814344
-21.04513901884475
-20.39280795204769
-20.23190833781593
-20.0

In [7]:
print_score(Z_2)



best bandwidth: 0.695192796178
(1797,)
-21.83139138597349
-21.929012116056445
-21.93334024936354
-21.860123624496183
-21.931358766938985
-21.931607829684005
-21.91726723043802
-21.933557669759075
-21.933060980130904
-21.933347375731778
-21.932876549043435
-21.89564836807919
-21.933473391419906
-21.93325235082962
-21.930998989526717
-21.933505538667013
-21.932280664283926
-21.932904552750625
-21.93314934337654
-21.933368317464645
-21.917338373225363
-21.868014389447715
-21.928312673569625
-21.933027836040264
-21.91969275384818
-21.933500437665344
-21.926004427268786
-21.93348464136404
-21.933537533058654
-21.93349657925601
-21.932645171715876
-21.933425277929345
-21.933462329651334
-21.933557996003444
-21.8210997076479
-21.933554019568966
-21.932808421398075
-21.933556694960753
-21.933535841059054
-21.92822965341456
-21.93207028405923
-21.848618531673324
-21.892257265008435
-21.932849593306074
-21.929928019955405
-21.930506031168235
-21.933557879380636
-21.86836092247406
-21.89473446127

In [8]:
print_score(Z_3)



best bandwidth: 0.695192796178
(1797,)
-28.568820465608468
-28.59748297623458
-28.598002390696365
-28.58655749269255
-28.5980045426952
-28.598026752850124
-28.59587009706408
-28.5980275904505
-28.598020066736105
-28.59802565057499
-28.597981497553704
-28.591274769720343
-28.598027182463916
-28.597959309243457
-28.59788080759904
-28.598027324319744
-28.59799090056122
-28.598004494294877
-28.598019414405307
-28.5979790653333
-28.596315418856534
-28.58330444121073
-28.597921721247303
-28.59797039839001
-28.597872127613535
-28.598027335067286
-28.597033638158777
-28.598027562142022
-28.59802744695316
-28.598014585015093
-28.597978580684867
-28.59799199839164
-28.598019558197578
-28.598027590414475
-28.57150343343057
-28.598027589639685
-28.597986587166922
-28.598027577410072
-28.598027552959152
-28.59783950716561
-28.59800659921956
-28.565763774561326
-28.58838779945225
-28.598024069774155
-28.597886379760098
-28.597825735562736
-28.59802759028009
-28.592756230739703
-28.591398148418804
-2