In [1]:
%matplotlib notebook

In [2]:
import matplotlib.pyplot as plt

In [3]:
import numpy as np
data = np.arange(10)

In [4]:
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x21a554535e0>]

matplotlib所绘制的图位于图片（Figure）对象中。你可以使用plt.figure生成一个新的图片

In [6]:
# 生成一个空白图片
fig = plt.figure()
# 图片应该是2×2的（最多四个图形），并且我们选择了四个图形中的第一个（序号从1开始）
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
# 'k--'是用于绘制黑色分段线的style选项
plt.plot(np.random.randn(50).cumsum(), 'k--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x21a57529450>]

你不能使用空白的图片进行绘图。你需要使用add_subplot创建一个或多个子图（subplot）

In [7]:
ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

(array([ 2.,  3.,  1., 14., 10.,  7., 10.,  6.,  9., 11.,  2., 10.,  4.,
         6.,  0.,  1.,  1.,  1.,  0.,  2.]),
 array([-2.19400106, -1.92057002, -1.64713898, -1.37370794, -1.10027691,
        -0.82684587, -0.55341483, -0.27998379, -0.00655276,  0.26687828,
         0.54030932,  0.81374035,  1.08717139,  1.36060243,  1.63403347,
         1.9074645 ,  2.18089554,  2.45432658,  2.72775761,  3.00118865,
         3.27461969]),
 <BarContainer object of 20 artists>)

In [8]:
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x21a574d7b20>

In [9]:
plt.plot(np.random.randn(30).cumsum(), 'ko--')

[<matplotlib.lines.Line2D at 0x21a57578310>]

In [10]:
ax4 = fig.add_subplot(2,2,4)

In [11]:
plt.plot(1,8,'bo')

[<matplotlib.lines.Line2D at 0x21a575a2500>]

add_subplot不仅可以增加子图，还可以重定义所有子图的排列方式

In [12]:
ax5 = fig.add_subplot(2,3,5)

新增子图之后，子图之间产生重叠，通过如下方法调整子图之间的间距

In [13]:
plt.subplots_adjust(wspace = 1.5, hspace = 1.5)

#### 折线图
matplotlib.plot方法，默认是画折线图
折线图需要的关键参数：
- 数据集：表示x轴y轴的点
- 线类型（linestyle）
- 标记类型（marker）
- 颜色（color）

后面三个参数 既可以用三个key参数分别指定，也可以用一个拼接而成的字符串直接指定

In [14]:
fig, axes = plt.subplots(1,1)
data = np.random.randn(30).cumsum()
plt.plot(data, 'ro')
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'b-', drawstyle='steps-post', label='step-post')
plt.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x21a57617ee0>

In [15]:
plt.xlim()

(-1.4500000000000002, 30.45)

In [16]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum(), 'k', label = 'one')
ax.plot(np.random.randn(1000).cumsum(), 'r', label = 'two')
ax.plot(np.random.randn(1000).cumsum(), 'y', label = 'three')
ax.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x21a5764b610>

In [17]:
ticks = ax.set_xticks([0,100,200,300,400,500,600,700,800,900,1000])
labels = ax.set_xticklabels(['zero','one','two','three','four','five','six','seven','eight','nine','ten'], rotation=30, fontsize='small')
ax.set_xlabel('stages')
ax.set_title('My first figure')

Text(0.5, 1.0, 'My first figure')

### 使用pandas与seaborn进行绘图

In [18]:
import pandas as pd

In [19]:
fig,axes = plt.subplots(1,2,figsize=(9,5))
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0,100,10))
s.plot()
df = pd.DataFrame(np.random.randn(10,4).cumsum(0), columns=['A','B','C','D'], index=np.arange(0,100,10))
df.plot(ax = axes[0])

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [20]:
fig, axes = plt.subplots(2,1)
data = pd.Series(np.random.randn(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax = axes[0], color='k', alpha=0.7)
data.plot.barh(ax = axes[1], color='k', alpha=0.7)

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [21]:
df = pd.DataFrame(np.random.rand(6,4), index = ['one','two','three','four','five','six'], columns=pd.Index(['A','B','C','D'],name = 'Genus'))
df1 = pd.DataFrame(np.random.rand(6,4), index = ['one','two','three','four','five','six'], columns=pd.Index(['A','B','C','D'],name = 'Genus'))
df.plot.bar()
df1.plot.barh(stacked = True, alpha = 0.5)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [22]:
tips = pd.read_csv('pydata-book-2nd-edition/examples/tips.csv')
tips

Unnamed: 0,total_bill,tip,smoker,day,time,size
0,16.99,1.01,No,Sun,Dinner,2
1,10.34,1.66,No,Sun,Dinner,3
2,21.01,3.50,No,Sun,Dinner,3
3,23.68,3.31,No,Sun,Dinner,2
4,24.59,3.61,No,Sun,Dinner,4
...,...,...,...,...,...,...
239,29.03,5.92,No,Sat,Dinner,3
240,27.18,2.00,Yes,Sat,Dinner,2
241,22.67,2.00,Yes,Sat,Dinner,2
242,17.82,1.75,No,Sat,Dinner,2


In [23]:
# 利用crosstab，统计特定day+size维度下，数据出现的【频率】
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [24]:
party_counts = party_counts.loc[:,2:5]
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [25]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='day'>

In [26]:
import seaborn as sns

In [27]:
tips['tip_pct'] = tips['tip']/(tips['total_bill'] - tips['tip'])

In [28]:
tips

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.50,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069
...,...,...,...,...,...,...,...
239,29.03,5.92,No,Sat,Dinner,3,0.256166
240,27.18,2.00,Yes,Sat,Dinner,2,0.079428
241,22.67,2.00,Yes,Sat,Dinner,2,0.096759
242,17.82,1.75,No,Sat,Dinner,2,0.108899


In [29]:
fig,axes = plt.subplots(1,1)
# sns.set(style='whitegrid')
# seaborn.barplot拥有一个hue选项，允许我们通过一个额外的分类值将数据分离
# sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

每个柱子的值是tip_pct的平均值，柱子上的和弦代表的是95%的置信区间（置信区间可以通过可选参数进行设置）

In [33]:
fig,axes = plt.subplots(1,1)
tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Frequency'>

In [34]:
fig,axes = plt.subplots(1,1)
tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Density'>

In [51]:
fig,axes = plt.subplots(1,1)
comp1 = np.random.normal(0, 1, size=200)
print(len(comp1))
comp2 = np.random.normal(10, 2, size=200)
print(len(comp2))
values = pd.Series(np.concatenate([comp1,comp2]))
print(len(values))
# distplot方法可以绘制直方图和连续密度估计，通过distplot方法seaborn使直方图和密度图的绘制更为简单
# bins=整数，即代表间隔的个数，整个分布范围会按照bins个数平均分配
sns.distplot(values, bins=100, color='k')

<IPython.core.display.Javascript object>

200
200
400




<AxesSubplot:ylabel='Density'>

### 散点图

In [52]:
macro = pd.read_csv('pydata-book-2nd-edition/examples/macrodata.csv')
macro

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0.00,0.00
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.150,141.7,3.08,5.1,177.830,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.260,1916.4,29.350,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.370,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.540,139.6,3.50,5.2,180.007,2.31,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2008.0,3.0,13324.600,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.270,-3.16,4.33
199,2008.0,4.0,13141.920,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
200,2009.0,1.0,12925.410,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
201,2009.0,2.0,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19


In [53]:
data = macro[['cpi','m1','tbilrate','unemp']]
data

Unnamed: 0,cpi,m1,tbilrate,unemp
0,28.980,139.7,2.82,5.8
1,29.150,141.7,3.08,5.1
2,29.350,140.5,3.82,5.3
3,29.370,140.0,4.33,5.6
4,29.540,139.6,3.50,5.2
...,...,...,...,...
198,216.889,1474.7,1.17,6.0
199,212.174,1576.5,0.12,6.9
200,212.671,1592.8,0.22,8.1
201,214.469,1653.6,0.18,9.2


In [63]:
# np.log求解自然对数
# diff 默认将DataFrame的后一行数据减去前一行数据
trans_data = np.log(data).diff().dropna()
trans_data

Unnamed: 0,cpi,m1,tbilrate,unemp
1,0.005849,0.014215,0.088193,-0.128617
2,0.006838,-0.008505,0.215321,0.038466
3,0.000681,-0.003565,0.125317,0.055060
4,0.005772,-0.002861,-0.212805,-0.074108
5,0.000338,0.004289,-0.266946,0.000000
...,...,...,...,...
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.002340,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339


In [64]:
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [73]:
fig,axes = plt.subplots(1,1)
sns.regplot('m1','unemp', data=trans_data)

<IPython.core.display.Javascript object>



<AxesSubplot:xlabel='m1', ylabel='unemp'>

In [74]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha':0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x21a02b0f130>