# Making Our Own Data

In [None]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
from sklearn.datasets import make_blobs, make_biclusters
from sklearn.datasets import make_checkerboard
from sklearn.datasets import make_regression
from random import gauss as gs

## Gaussian Blobs

In [None]:
(X, y) = make_blobs(n_samples=100, centers=3, n_features=2)

In [None]:
df = pd.DataFrame(dict(x=X[:,0], y=X[:,1], label=y))
colors = {0:'red', 1:'blue', 2:'green'}
fig, ax = plt.subplots()
grouped = df.groupby('label')
for key, group in grouped:
    group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])

## Biclusters

In [None]:
data, rows, columns = make_biclusters(shape=(10, 10), n_clusters=2)

plt.matshow(data);

## Checkerboard

In [None]:
n_clusters = (8, 8)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=0,
    shuffle=False, random_state=42)

plt.matshow(data, cmap=plt.cm.Blues);

In [None]:
x, y = make_regression(n_features=2)

In [None]:
plt.plot(x, y, 'o');

## White Noise

The idea behind white noise is that it is truly random.

We don't want white noise to describe our model per se, but we *do* want it to describe our model *error*.

Can you explain these truisms?

In [None]:
rands = []
for _ in range(1000):
    rands.append(gs(0, 1))
series = pd.Series(rands)

In [None]:
X = np.linspace(-10, 10, 1000)
plt.figure(figsize=(10, 7))
plt.plot(X, series);