In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Reducing Features Using Principal Components**

Q1.Given a set of features, you want to reduce the number of features while retaining the variance in the data

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets

In [4]:
# Load the data 
digits=datasets.load_digits()
# Standardize the feature matrix 
features=StandardScaler().fit_transform(digits.data)


In [5]:
# Create a PCA that will retain 99% of variance 
pca = PCA(n_components=0.99, whiten=True)
# Conduct PCA 
features_pca = pca.fit_transform(features)


In [6]:
# Show results 
print("Original number of features:", features.shape[1]) 
print("Reduced number of features:", features_pca.shape[1])


Original number of features: 64
Reduced number of features: 54


**Reducing Features When Data Is Linearly Inseparable**

Q2.You suspect you have linearly inseparable data and want to reduce the dimensions

In [14]:
from sklearn.decomposition import PCA,KernelPCA
from sklearn.datasets import make_circles

In [16]:
# create linearly inseperable data
features,_=make_circles(n_samples=1000,random_state=1,noise=0.1,factor=0.1)


In [17]:
# Apply kernal PCA with radius basis function (RBF)
kpca = KernelPCA(kernel="rbf", gamma=15,n_components=1) 
features_kpca = kpca.fit_transform(features)

In [18]:
print("Original number of features:", features.shape[1]) 
print("Reduced number of features:", features_kpca.shape[1])


Original number of features: 2
Reduced number of features: 1


>  Reducing Features by Maximizing Class Separability 

Q3.You want to reduce the features to be used by a classifier. 

In [19]:
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


Load the data

In [20]:
iris=datasets.load_iris()
features=iris.data
target=iris.target

Create and run an LDA, then use it to transform the features 

In [26]:
lda=LinearDiscriminantAnalysis(n_components=2)
features_lda=lda.fit(features,target).transform(features)


In [24]:
print("Original number of features:", features.shape[1]) 
print("Reduced number of features:", features_lda.shape[1])


Original number of features: 4
Reduced number of features: 2


**Reducing features using matrix factorization**

Q4.You have a feature matrix of nonnegative values and want to reduce the dimensionality. 

In [29]:
from sklearn.decomposition import NMF
from sklearn import datasets

In [31]:
digits=datasets.load_digits()
features=digits.data


In [32]:
nmf=NMF(n_components=10,random_state=1)
features_nmf=nmf.fit_transform(features)



In [None]:
print(features.shape[1])
print(features_nmf.shape[1])