# 数据分析常用图表的绘制

In [1]:
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np

## 1. [Subplots](https://matplotlib.org/api/pyplot_api.html?highlight=matplotlib%20pyplot%20subplots#matplotlib.pyplot.subplots)

In [1]:
plt.figure()
# 表示1行2列，现在在第一个子图上进行操作
plt.subplot(1, 2, 1)

linear_data = np.arange(1, 9)
plt.plot(linear_data, '-o')

NameError: name 'plt' is not defined

In [5]:
exponential_data = linear_data ** 2
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-x')

[<matplotlib.lines.Line2D at 0x1f258edf0f0>]

In [6]:
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')

[<matplotlib.lines.Line2D at 0x1f259205f28>]

In [7]:
# 保证子图中坐标范围一致
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f25924d710>]

In [9]:
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3,
                                                      sharex=True, sharey=True)
ax5.plot(exponential_data, '-')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f25a74a978>]

## 2. [直方图](https://matplotlib.org/api/pyplot_api.html?highlight=matplotlib%20pyplot%20hist#matplotlib.pyplot.hist)

In [11]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1, ax2, ax3, ax4]

for n in range(len(axs)):
    sample_size = 10 ** (n + 1)
    sample = np.random.normal(loc=0., scale=1., size=sample_size)
    # 默认bin的个数为10
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

In [12]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1, ax2, ax3, ax4]

for n in range(len(axs)):
    sample_size = 10 ** (n + 1)
    sample = np.random.normal(loc=0., scale=1., size=sample_size)
    # bin设为100
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

In [14]:
# 使用gridspec和直方图绘制一个复杂分析图
import matplotlib.gridspec as gridspec

x = np.random.random(size=10000)
y = np.random.normal(loc=0., scale=1., size=10000)

plt.figure()
gspec = gridspec.GridSpec(3, 3)

top_hist = plt.subplot(gspec[0, 1:])
side_hist = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])

lower_right.scatter(x, y)
top_hist.hist(x, bins=100, normed=True)
side_hist.hist(y, bins=100, orientation='horizontal', normed=True)
side_hist.invert_xaxis()

<IPython.core.display.Javascript object>

## 3. [盒形图](https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.boxplot.html)

In [2]:
import pandas as pd
# 正态分布采样
normal_sample = np.random.normal(loc=0., scale=1., size=10000)
# 随机数采样
random_sample = np.random.random(size=10000)
# gamma分布采样
gamma_sample = np.random.gamma(2, size=10000)

df = pd.DataFrame({'normal': normal_sample,
                  'random': random_sample,
                  'gamma': gamma_sample})

In [4]:
df.describe()

Unnamed: 0,gamma,normal,random
count,10000.0,10000.0,10000.0
mean,2.004713,-0.003639,0.50606
std,1.398316,0.999177,0.28962
min,0.014152,-4.245805,0.000165
25%,0.983928,-0.673015,0.253994
50%,1.697079,-0.006095,0.508406
75%,2.680493,0.664267,0.755801
max,12.017532,4.193779,0.999936


In [5]:
plt.figure()
plt.boxplot(df['normal'], whis='range')

<IPython.core.display.Javascript object>

{'boxes': [<matplotlib.lines.Line2D at 0x9b6b5f8>],
 'caps': [<matplotlib.lines.Line2D at 0x9b77f60>,
  <matplotlib.lines.Line2D at 0x9b83898>],
 'fliers': [<matplotlib.lines.Line2D at 0x9b89908>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x9b83a20>],
 'whiskers': [<matplotlib.lines.Line2D at 0x9b6bfd0>,
  <matplotlib.lines.Line2D at 0x9b77828>]}

In [6]:
plt.figure()
plt.boxplot([df['normal'], df['random'], df['gamma']], whis='range')

<IPython.core.display.Javascript object>

{'boxes': [<matplotlib.lines.Line2D at 0xb174198>,
  <matplotlib.lines.Line2D at 0xb1865f8>,
  <matplotlib.lines.Line2D at 0xb194390>],
 'caps': [<matplotlib.lines.Line2D at 0xb179c18>,
  <matplotlib.lines.Line2D at 0xb179da0>,
  <matplotlib.lines.Line2D at 0xb140e48>,
  <matplotlib.lines.Line2D at 0xb1402e8>,
  <matplotlib.lines.Line2D at 0xb197c50>,
  <matplotlib.lines.Line2D at 0xb19db38>],
 'fliers': [<matplotlib.lines.Line2D at 0xb180e10>,
  <matplotlib.lines.Line2D at 0x9babac8>,
  <matplotlib.lines.Line2D at 0xb1a5ba8>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0xb1805f8>,
  <matplotlib.lines.Line2D at 0xb135470>,
  <matplotlib.lines.Line2D at 0xb19dcc0>],
 'whiskers': [<matplotlib.lines.Line2D at 0xb174ba8>,
  <matplotlib.lines.Line2D at 0xb174d30>,
  <matplotlib.lines.Line2D at 0xb186e48>,
  <matplotlib.lines.Line2D at 0xb18cd30>,
  <matplotlib.lines.Line2D at 0xb194be0>,
  <matplotlib.lines.Line2D at 0xb197ac8>]}

In [7]:
plt.figure()
plt.boxplot([df['normal'], df['random'], df['gamma']])

<IPython.core.display.Javascript object>

{'boxes': [<matplotlib.lines.Line2D at 0xb488d68>,
  <matplotlib.lines.Line2D at 0xb491b70>,
  <matplotlib.lines.Line2D at 0x9a0bc18>],
 'caps': [<matplotlib.lines.Line2D at 0xb459c18>,
  <matplotlib.lines.Line2D at 0xb434b70>,
  <matplotlib.lines.Line2D at 0xb140d30>,
  <matplotlib.lines.Line2D at 0x9ad5a90>,
  <matplotlib.lines.Line2D at 0x99ffba8>,
  <matplotlib.lines.Line2D at 0x99ffd30>],
 'fliers': [<matplotlib.lines.Line2D at 0xb1b6278>,
  <matplotlib.lines.Line2D at 0x9a0bb00>,
  <matplotlib.lines.Line2D at 0xb4a4da0>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0xb447c88>,
  <matplotlib.lines.Line2D at 0x9ad5c18>,
  <matplotlib.lines.Line2D at 0xb4a4588>],
 'whiskers': [<matplotlib.lines.Line2D at 0xb488f28>,
  <matplotlib.lines.Line2D at 0xb449208>,
  <matplotlib.lines.Line2D at 0xb497a90>,
  <matplotlib.lines.Line2D at 0xb14e550>,
  <matplotlib.lines.Line2D at 0x9a05b38>,
  <matplotlib.lines.Line2D at 0x9a05cc0>]}

## 4. [热图](https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.hist2d.html?highlight=matplotlib%20axes%20axes%20hist2d#matplotlib.axes.Axes.hist2d)

In [9]:
plt.figure()
y = np.random.normal(loc=0., scale=1., size=10000)
x = np.random.random(size=10000)
plt.hist2d(x, y, bins=25)

<IPython.core.display.Javascript object>

(array([[  0.,   0.,   1.,   0.,   4.,   8.,  13.,  19.,  28.,  35.,  50.,
          51.,  47.,  42.,  39.,  24.,  15.,  17.,   4.,   1.,   0.,   1.,
           0.,   0.,   0.],
        [  0.,   0.,   1.,   1.,   6.,   3.,  15.,  20.,  32.,  27.,  38.,
          58.,  43.,  51.,  35.,  31.,  23.,  10.,   5.,   1.,   1.,   2.,
           0.,   0.,   0.],
        [  0.,   0.,   0.,   3.,   4.,   5.,  12.,  30.,  30.,  46.,  53.,
          53.,  49.,  42.,  52.,  25.,  12.,  15.,   5.,   1.,   0.,   1.,
           0.,   0.,   0.],
        [  0.,   0.,   2.,   7.,   5.,  10.,  13.,  19.,  35.,  41.,  59.,
          48.,  35.,  37.,  27.,  25.,  16.,   9.,   7.,   5.,   1.,   0.,
           0.,   0.,   0.],
        [  0.,   0.,   0.,   3.,   3.,   7.,  16.,  26.,  35.,  39.,  43.,
          51.,  53.,  42.,  30.,  15.,  16.,  10.,   6.,   4.,   2.,   1.,
           0.,   0.,   0.],
        [  0.,   0.,   0.,   3.,   1.,  10.,  13.,  20.,  30.,  36.,  41.,
          48.,  59.,  42.,  45.,  2

In [10]:
plt.figure()
y = np.random.normal(loc=0., scale=1., size=10000)
x = np.random.random(size=10000)
plt.hist2d(x, y, bins=100)

<IPython.core.display.Javascript object>

(array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]]),
 array([  1.73330697e-04,   1.01693953e-02,   2.01654598e-02,
          3.01615244e-02,   4.01575889e-02,   5.01536535e-02,
          6.01497181e-02,   7.01457826e-02,   8.01418472e-02,
          9.01379118e-02,   1.00133976e-01,   1.10130041e-01,
          1.20126105e-01,   1.30122170e-01,   1.40118235e-01,
          1.50114299e-01,   1.60110364e-01,   1.70106428e-01,
          1.80102493e-01,   1.90098557e-01,   2.00094622e-01,
          2.10090687e-01,   2.20086751e-01,   2.30082816e-01,
          2.40078880e-01,   2.50074945e-01,   2.60071009e-01,
          2.70067074e-01,   2.80063138e-01,   2.90059203e-01,
          3.00055268e-01,   3.10051332e-01,   3.20047397e-01,
          3.30043461e-01,   3.40

In [11]:
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0xc1682e8>