**One problem with the preceeding implementations of PCA is that they require the whole training set to fit in memory in order for the algorithm to run. Fortunately, Incremental PCA algorithms have been developed. They allow you to split the training set into mini-batches and feed an IPCA algorithm one mini-batch at a time. This is useful for large training sets and for applying PCA online**

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('iris.csv')

In [4]:
df.drop('Id',axis=1,inplace=True)

In [5]:
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
X = df.drop('Species',axis=1)
y = df['Species']

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=333,test_size=0.33)

In [8]:
## splitting the dataset into the 10 batches and then fitting each one of them partially one after another

from sklearn.decomposition import IncrementalPCA
n_batches = 10

inc_pca = IncrementalPCA(n_components=2)

for X_batch in np.array_split(X_train, n_batches):
    inc_pca.partial_fit(X_batch)

In [9]:
## transforming the data now
X_reduced = inc_pca.transform(X_train)

In [10]:
X_reduced

array([[ 1.65215756, -0.55094144],
       [-3.14362109,  0.18251239],
       [ 2.38852815, -0.20483744],
       [ 3.31352675,  1.38598206],
       [-2.92957136, -0.28830509],
       [ 1.51300739, -0.10963533],
       [-2.51893746, -0.13785188],
       [ 0.58124775, -0.07766773],
       [ 3.57321229,  0.50112415],
       [-2.82116682, -0.51422765],
       [-2.56406758,  1.2492356 ],
       [ 0.20341035, -0.26499019],
       [-2.46732484,  0.64029962],
       [ 1.11561704,  0.28236335],
       [-2.51764096,  0.29519592],
       [ 1.00855641, -0.47930343],
       [-2.54559075,  0.86380456],
       [ 0.99019845, -0.14007698],
       [ 1.26474574, -0.60851029],
       [ 1.97653287,  0.11220879],
       [ 2.91421535,  0.41809047],
       [-2.13416604,  0.47425851],
       [-2.65500653,  0.38306999],
       [-2.60373321, -0.0392303 ],
       [-0.64377391, -0.95858206],
       [ 3.56870488,  1.16983656],
       [ 2.68968139,  0.36542843],
       [ 1.99580064,  0.42621547],
       [-2.54175536,