Plot several randomly generated 2D classification datasets. This example illustrates the **datasets.make_classification datasets.make_blobs** and **datasets.make_gaussian_quantiles** functions.

For `make_classification`, three binary and two multi-class classification datasets are generated, with different numbers of informative features and clusters per class.

#### New to Plotly?
Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/).
<br>You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online).
<br>We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started!

### Version

In [1]:
import sklearn
sklearn.__version__

'0.18'

### Imports

This tutorial imports [make_classification](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html#sklearn.datasets.make_classification), [make_blobs](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html#sklearn.datasets.make_blobs) and [make_gaussian_quantiles](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles).

In [2]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.datasets import make_blobs
from sklearn.datasets import make_gaussian_quantiles

### Plot Dataset

In [3]:
fig = tools.make_subplots(rows=3, cols=2,
                          print_grid=False,
                          subplot_titles=("One informative feature, one cluster per class",
                                          "Two informative features, one cluster per class",
                                          "Two informative features, two clusters per class",
                                          "Multi-class, two informative features, one cluster",
                                          "Three blobs",
                                          "Gaussian divided into three quantiles",))

In [4]:
X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=1,
                             n_clusters_per_class=1)

one_informative = go.Scatter(x=X1[:, 0], y=X1[:, 1], 
                             mode='markers',
                             showlegend=False,
                             marker=dict(color=Y1,
                                         line=dict(color='black', width=1))
                            )

fig.append_trace(one_informative, 1, 1)

X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2,
                             n_clusters_per_class=1)

two_informative1 = go.Scatter(x=X1[:, 0], y=X1[:, 1],
                             mode='markers',
                             showlegend=False,
                             marker=dict(color=Y1,
                                         line=dict(color='black', width=1))
                            )

fig.append_trace(two_informative1, 1, 2)

X2, Y2 = make_classification(n_features=2, n_redundant=0, n_informative=2)
two_informative2 = go.Scatter(x=X2[:, 0], y=X2[:, 1],
                              mode='markers',
                              showlegend=False,
                              marker=dict(color=Y1,
                                          line=dict(color='black', width=1))
                             )


fig.append_trace(two_informative2, 2, 1)

X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2,
                             n_clusters_per_class=1, n_classes=3)

multiclass = go.Scatter(x=X1[:, 0], y=X1[:, 1],
                        mode='markers',
                        showlegend=False,
                        marker=dict(color=Y1,
                                    line=dict(color='black', width=1))
                       )

fig.append_trace(two_informative2, 2, 2)

X1, Y1 = make_blobs(n_features=2, centers=3)
three_blobs = go.Scatter(x=X1[:, 0], y=X1[:, 1], 
                         mode='markers',
                         showlegend=False,
                         marker=dict(color=Y1,
                                     line=dict(color='black', width=1))
                        )
fig.append_trace(three_blobs, 3, 1)

X1, Y1 = make_gaussian_quantiles(n_features=2, n_classes=3)
gaussian = go.Scatter(x=X1[:, 0], y=X1[:, 1], 
                      mode='markers',
                      showlegend=False,
                      marker=dict(color=Y1,
                                 line=dict(color='black', width=1))
                     )
fig.append_trace(gaussian, 3, 2)


In [5]:
fig['layout'].update(height=900)

for i in map(str, range(1, 7)):
    x = 'xaxis' + i
    y = 'yaxis' + i
    fig['layout'][x].update(zeroline=False, showgrid=False)
    fig['layout'][y].update(zeroline=False, showgrid=False)
    
py.iplot(fig)    

In [2]:
from IPython.display import display, HTML

display(HTML('<link href="//fonts.googleapis.com/css?family=Open+Sans:600,400,300,200|Inconsolata|Ubuntu+Mono:400,700" rel="stylesheet" type="text/css" />'))
display(HTML('<link rel="stylesheet" type="text/css" href="http://help.plot.ly/documentation/all_static/css/ipython-notebook-custom.css">'))

! pip install git+https://github.com/plotly/publisher.git --upgrade
import publisher
publisher.publish(
    'Plot-randomly-generated-classification-dataset.ipynb', 'scikit-learn/plot-random-dataset/', 'Randomly Generated Classification Dataset| plotly',
    ' ',
    title = 'Randomly Generated Classification Dataset | plotly',
    name = 'Randomly Generated Classification Dataset',
    has_thumbnail='true', thumbnail='thumbnail/random_dataset.jpg', 
    language='scikit-learn', page_type='example_index',
    display_as='dataset', order=3,
    ipynb= '~Diksha_Gabha/2904')

Collecting git+https://github.com/plotly/publisher.git
  Cloning https://github.com/plotly/publisher.git to /tmp/pip-xLPYgu-build
Installing collected packages: publisher
  Running setup.py install for publisher ... [?25l- error
    Complete output from command /usr/bin/python -u -c "import setuptools, tokenize;__file__='/tmp/pip-xLPYgu-build/setup.py';exec(compile(getattr(tokenize, 'open', open)(__file__).read().replace('\r\n', '\n'), __file__, 'exec'))" install --record /tmp/pip-JbFcRy-record/install-record.txt --single-version-externally-managed --compile:
    running install
    running build
    running build_py
    creating build
    creating build/lib.linux-x86_64-2.7
    creating build/lib.linux-x86_64-2.7/publisher
    copying publisher/publisher.py -> build/lib.linux-x86_64-2.7/publisher
    copying publisher/__init__.py -> build/lib.linux-x86_64-2.7/publisher
    running install_lib
    creating /usr/local/lib/python2.7/dist-packages/publisher
    error: could not create 