These figures aid in illustrating how a point cloud can be very flat in one direction–which is where PCA comes in to choose a direction that is not flat.

#### New to Plotly?
Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/).
<br>You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online).
<br>We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started!

### Version

In [1]:
import sklearn
sklearn.__version__

'0.18'

### Imports

This tutorial imports [PCA](http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html#sklearn.decomposition.PCA).

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go

from sklearn.decomposition import PCA

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

Automatically created module for IPython interactive environment


### Calculations

Create the data

In [3]:
e = np.exp(1)
np.random.seed(4)


def pdf(x):
    return 0.5 * (stats.norm(scale=0.25 / e).pdf(x)
                  + stats.norm(scale=4 / e).pdf(x))

y = np.random.normal(scale=0.5, size=(30000))
x = np.random.normal(scale=0.5, size=(30000))
z = np.random.normal(scale=0.1, size=len(x))

density = pdf(x) * pdf(y)
pdf_z = pdf(5 * z)

density *= pdf_z

a = x + y
b = 2 * y
c = a - b + z

norm = np.sqrt(a.var() + b.var())
a /= norm
b /= norm

### Plot Figures

In [4]:
def plot_figs(elev, azim):
    
    scatter = go.Scatter3d(x=a[::10], 
                           y=b[::10], 
                           z=c[::10], 
                           mode='markers',
                           opacity=0.5,
                           marker=dict(color='pink')
                         )
    Y = np.c_[a, b, c]

    # Using SciPy's SVD, this would be:
    # _, pca_score, V = scipy.linalg.svd(Y, full_matrices=False)

    pca = PCA(n_components=3)
    pca.fit(Y)
    pca_score = pca.explained_variance_ratio_
    V = pca.components_

    x_pca_axis, y_pca_axis, z_pca_axis = V.T * pca_score / pca_score.min()

    x_pca_axis, y_pca_axis, z_pca_axis = 3 * V.T
    x_pca_plane = np.r_[x_pca_axis[:2], - x_pca_axis[1::-1]]
    y_pca_plane = np.r_[y_pca_axis[:2], - y_pca_axis[1::-1]]
    z_pca_plane = np.r_[z_pca_axis[:2], - z_pca_axis[1::-1]]
    x_pca_plane.shape = (2, 2)
    y_pca_plane.shape = (2, 2)
    z_pca_plane.shape = (2, 2)
    
    surface = go.Surface(x=x_pca_plane,
                         y=y_pca_plane, 
                         z=z_pca_plane,
                         showscale=False,
                        colorscale=[[0,'white'],[1,'cyan']])
    data = [scatter, surface]
    layout=go.Layout(scene=dict(
                                xaxis=dict(showgrid=False, ticks='', 
                                           showticklabels=False, zeroline=False),
                                yaxis=dict(showgrid=False, ticks='', 
                                           showticklabels=False, zeroline=False),
                                zaxis=dict(showgrid=False, ticks='', 
                                           showticklabels=False, zeroline=False))
                    )
    fig = go.Figure(data=data, layout=layout)
    return fig

In [5]:
elev = -40
azim = -80
py.iplot(plot_figs(elev, azim))

In [6]:
elev = 30
azim = 20
py.iplot(plot_figs(elev, azim))

### License

Authors:  
    
          Gael Varoquaux
          
          Jaques Grobler
            
          Kevin Hughes

License: 
    
          BSD 3 clause

In [None]:

from IPython.display import display, HTML

display(HTML('<link href="//fonts.googleapis.com/css?family=Open+Sans:600,400,300,200|Inconsolata|Ubuntu+Mono:400,700" rel="stylesheet" type="text/css" />'))
display(HTML('<link rel="stylesheet" type="text/css" href="http://help.plot.ly/documentation/all_static/css/ipython-notebook-custom.css">'))

! pip install git+https://github.com/plotly/publisher.git --upgrade
import publisher
publisher.publish(
    'principal-components-analysis.ipynb', 'scikit-learn/plot-pca-3d/', 'Principal Components Analysis (PCA) | plotly',
    'pca',
    title = 'Principal Components Analysis (PCA) | plotly',
    name = 'Principal Components Analysis (PCA)',
    has_thumbnail='true', thumbnail='thumbnail/pca.jpg', 
    language='scikit-learn', page_type='example_index',
    display_as='decomposition', order=6,
    ipynb= '~Diksha_Gabha/2932')

Collecting git+https://github.com/plotly/publisher.git
  Cloning https://github.com/plotly/publisher.git to /tmp/pip-zqIqrE-build
