# 앤스콤 데이터 집합 불러오기

In [40]:
import seaborn as sns

anscombe = sns.load_dataset("anscombe")
print(anscombe)
print(type(anscombe))

   dataset     x      y
0        I  10.0   8.04
1        I   8.0   6.95
2        I  13.0   7.58
3        I   9.0   8.81
4        I  11.0   8.33
5        I  14.0   9.96
6        I   6.0   7.24
7        I   4.0   4.26
8        I  12.0  10.84
9        I   7.0   4.82
10       I   5.0   5.68
11      II  10.0   9.14
12      II   8.0   8.14
13      II  13.0   8.74
14      II   9.0   8.77
15      II  11.0   9.26
16      II  14.0   8.10
17      II   6.0   6.13
18      II   4.0   3.10
19      II  12.0   9.13
20      II   7.0   7.26
21      II   5.0   4.74
22     III  10.0   7.46
23     III   8.0   6.77
24     III  13.0  12.74
25     III   9.0   7.11
26     III  11.0   7.81
27     III  14.0   8.84
28     III   6.0   6.08
29     III   4.0   5.39
30     III  12.0   8.15
31     III   7.0   6.42
32     III   5.0   5.73
33      IV   8.0   6.58
34      IV   8.0   5.76
35      IV   8.0   7.71
36      IV   8.0   8.84
37      IV   8.0   8.47
38      IV   8.0   7.04
39      IV   8.0   5.25
40      IV  19.0

# matplotlib 라이브러리로 간단한 그래프 그리기

In [44]:
%matplotlib notebook
import matplotlib.pyplot as plt

dataset_1 = anscombe[anscombe['dataset'] == 'I']

In [42]:
plt.plot(dataset_1['x'], dataset_1['y'])

[<matplotlib.lines.Line2D at 0x209b23fc4c0>]

In [43]:
plt.plot(dataset_1['x'], dataset_1['y'], 'o')

[<matplotlib.lines.Line2D at 0x209b23fcc40>]

# 한 번에 4개의 그래프 그리기

In [45]:
dataset_2 = anscombe[anscombe['dataset'] == 'II']
dataset_3 = anscombe[anscombe['dataset'] == 'III']
dataset_4 = anscombe[anscombe['dataset'] == 'IV']

In [46]:
fig = plt.figure()
axes1 = fig.add_subplot(2, 2, 1)
axes2 = fig.add_subplot(2, 2, 2)
axes3 = fig.add_subplot(2, 2, 3)
axes4 = fig.add_subplot(2, 2, 4)

<IPython.core.display.Javascript object>

In [47]:
axes1.plot(dataset_1['x'], dataset_1['y'], 'o')
axes2.plot(dataset_2['x'], dataset_2['y'], 'o')
axes3.plot(dataset_3['x'], dataset_3['y'], 'o')
axes4.plot(dataset_4['x'], dataset_4['y'], 'o')

fig

<IPython.core.display.Javascript object>

In [48]:
axes1.set_title("dataset_1")
axes2.set_title("dataset_2")
axes3.set_title("dataset_3")
axes4.set_title("dataset_4")

fig

<IPython.core.display.Javascript object>

In [49]:
fig.suptitle("Anscombe Data")

fig

<IPython.core.display.Javascript object>

In [50]:
fig.tight_layout()

fig

<IPython.core.display.Javascript object>

# 기초 그래프 그리기 - 히스토그램, 산점도, 박스 그래프

In [86]:
tips = sns.load_dataset("tips")
print(tips.head())
print(type(tips))

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4
<class 'pandas.core.frame.DataFrame'>


In [52]:
fig = plt.figure()
axes1 = fig.add_subplot(1, 1, 1)

<IPython.core.display.Javascript object>

In [53]:
axes1.hist(tips['total_bill'], bins=10)
axes1.set_title('Histogram of Total Bill')
axes1.set_xlabel('Frequency')
axes1.set_ylabel('Total Bill')

fig

<IPython.core.display.Javascript object>

In [54]:
scatter_plot = plt.figure()
axes1 = scatter_plot.add_subplot(1, 1, 1)
axes1.scatter(tips['total_bill'], tips['tip'])
axes1.set_title('Scatterplot of Total Bill vs Tip')
axes1.set_xlabel('Total Bill')
axes1.set_ylabel('Tip')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Tip')

In [55]:
boxplot = plt.figure()
axes1 = boxplot.add_subplot(1, 1, 1)
axes1.boxplot([tips[tips['sex'] == 'Female']['tip'],
                tips[tips['sex'] == 'Male']['tip']],
                labels=['Female', 'Male'])
axes1.set_title('Boxplot of Tips by Sex')
axes1.set_xlabel('Sex')
axes1.set_ylabel('Tip')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Tip')

In [56]:
def recode_sex(sex):
    if sex == 'Female':
        return 0
    else:
        return 1
    
tips['sex_color'] = tips['sex'].apply(recode_sex)

In [57]:
scatter_plot = plt.figure()
axes1 = scatter_plot.add_subplot(1, 1, 1)
axes1.scatter(
    x=tips['total_bill'],
    y=tips['tip'],
    s=tips['size'] * 10,
    c=tips['sex_color'],
    alpha=0.5)
axes1.set_title('Total Bill vs Tip Colored by Sex and Sized by Size')
axes1.set_xlabel('Total Bill')
axes1.set_ylabel('Tip')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Tip')

# 단변량 그래프 그리기 - 히스토그램

In [58]:
ax = plt.subplots()
ax = sns.distplot(tips['total_bill'])
ax.set_title('Total Bill Histogram with Density Plot')

<IPython.core.display.Javascript object>



Text(0.5, 1.0, 'Total Bill Histogram with Density Plot')

In [72]:
ax = plt.subplots() 
ax = sns.distplot(tips['total_bill'], kde=False) 
ax.set_title('Total Bill Histogram') 
ax.set_xlabel('Total Bill') 
ax.set_ylabel('Frequency')

<IPython.core.display.Javascript object>



Text(0, 0.5, 'Frequency')

In [73]:
ax = plt.subplots() 
ax = sns.distplot(tips['total_bill'], hist=False) 
ax.set_title('Total Bill Density') 
ax.set_xlabel('Total Bill') 
ax.set_ylabel('Unit Probability')

<IPython.core.display.Javascript object>



Text(0, 0.5, 'Unit Probability')

In [101]:
hist_den_rug, ax = plt.subplots() 
ax = sns.distplot(tips['total_bill'], rug=True) 
ax.set_title('Total Bill Histogram with Density and Rug Plot') 
ax.set_xlabel('Total Bill')

<IPython.core.display.Javascript object>



Text(0.5, 0, 'Total Bill')

In [75]:
ax = plt.subplots() 
ax = sns.countplot('day', data=tips) 
ax.set_title('Count of days') 
ax.set_xlabel('Day of the Week') 
ax.set_ylabel('Frequency') 

<IPython.core.display.Javascript object>



Text(0, 0.5, 'Frequency')

In [76]:
ax = plt.subplots() 
ax = sns.regplot(x='total_bill', y='tip', data=tips) 
ax.set_title('Scatterplot of Total Bill and Tip') 
ax.set_xlabel('Total Bill') 
ax.set_ylabel('Tip') 

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Tip')

In [78]:
joint = sns.jointplot(x='total_bill', y='tip', data=tips) 
joint.set_axis_labels(xlabel='Total Bill', ylabel='Tip')
joint.fig.suptitle('Joint Plot of Total Bill and Tip', fontsize=10, y=1.03)

<IPython.core.display.Javascript object>

Text(0.5, 1.03, 'Joint Plot of Total Bill and Tip')

In [62]:
joint = sns.jointplot(x='total_bill', y='tip', data=tips)
joint.set_axis_labels(xlabel='Total Bill', ylabel='Tip')
joint.fig.suptitle('Joint Plot of Total Bill and Tip', fontsize=10, y=1.03)

<IPython.core.display.Javascript object>

Text(0.5, 1.03, 'Joint Plot of Total Bill and Tip')

In [79]:
hexbin = sns.jointplot(x="total_bill", y="tip", data=tips, kind="hex") 
hexbin.set_axis_labels(xlabel='Total Bill', ylabel='Tip')
hexbin.fig.suptitle('Hexbin Joint Plot of Total Bill and Tip', fontsize=10, y=1.03)

<IPython.core.display.Javascript object>

Text(0.5, 1.03, 'Hexbin Joint Plot of Total Bill and Tip')

In [80]:
kde, ax = plt.subplots() 
ax = sns.kdeplot(data=tips['total_bill'],
                 data2=tips['tip'], 
                 shade=True)
ax.set_title('Kernel Density Plot of Total Bill and Tip') 
ax.set_xlabel('Total Bill') 
ax.set_ylabel('Tip') 

<IPython.core.display.Javascript object>



Text(0, 0.5, 'Tip')

In [81]:
ax = plt.subplots() 
ax = sns.barplot(x='time', y='total_bill', data=tips) 
ax.set_title('Bar plot of average total bill for time of day') 
ax.set_xlabel('Time of day') 
ax.set_ylabel('Average total bill')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Average total bill')

In [82]:
ax = plt.subplots() 
ax = sns.boxplot(x='time', y='total_bill', data=tips) 
ax.set_title('Boxplot of total bill by time of day') 
ax.set_xlabel('Time of day') 
ax.set_ylabel('Total Bill') 

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Total Bill')

In [83]:
ax = plt.subplots() 
ax = sns.violinplot(x='time', y='total_bill', data=tips) 
ax.set_title('Violin plot of total bill by time of day') 
ax.set_xlabel('Time of day') 
ax.set_ylabel('Total Bill')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Total Bill')

In [85]:
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_color
0,16.99,1.01,Female,No,Sun,Dinner,2,0
1,10.34,1.66,Male,No,Sun,Dinner,3,1
2,21.01,3.50,Male,No,Sun,Dinner,3,1
3,23.68,3.31,Male,No,Sun,Dinner,2,1
4,24.59,3.61,Female,No,Sun,Dinner,4,0
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,1
240,27.18,2.00,Female,Yes,Sat,Dinner,2,0
241,22.67,2.00,Male,Yes,Sat,Dinner,2,1
242,17.82,1.75,Male,No,Sat,Dinner,2,1


In [87]:
fig = sns.pairplot(tips)

<IPython.core.display.Javascript object>

In [102]:
pair_grid = sns.PairGrid(tips) 
pair_grid = pair_grid.map_upper(sns.regplot) 
pair_grid = pair_grid.map_lower(sns.kdeplot) 
pair_grid = pair_grid.map_diag(sns.histplot, kde=True)
plt.show()

<IPython.core.display.Javascript object>

# 다변량 그래프 그리기

In [104]:
ax = plt.subplots()
ax = sns.violinplot(x='time', y='total_bill', hue='sex', data=tips, split=True)

<IPython.core.display.Javascript object>

In [107]:
scatter = sns.lmplot(x='total_bill', y='tip', data=tips, hue='sex', fit_reg=False)

<IPython.core.display.Javascript object>

In [109]:
fig = sns.pairplot(tips, hue='sex')

<IPython.core.display.Javascript object>

In [128]:
scatter = sns.lmplot(x='total_bill', y='tip', data=tips, fit_reg=False, hue='sex', scatter_kws={'s': 100}) 
plt.show()

<IPython.core.display.Javascript object>

In [129]:
scatter = sns.lmplot(x='total_bill', y='tip', data=tips, fit_reg=False, hue='sex', markers=['o', 'x'],scatter_kws={'s': 50}) 
plt.show()

<IPython.core.display.Javascript object>

In [131]:
anscombe_plot = sns.lmplot(x='x', y='y', data=anscombe, fit_reg=False, col='dataset', col_wrap=2)

<IPython.core.display.Javascript object>

In [132]:
facet = sns.FacetGrid(tips, col='time') 
facet.map(sns.distplot, 'total_bill', rug=True) 

<IPython.core.display.Javascript object>



<seaborn.axisgrid.FacetGrid at 0x209bd8c8400>

In [133]:
facet = sns.FacetGrid(tips, col='time', row='smoker', hue='sex') 
facet.map(plt.scatter, 'total_bill', 'tip') 

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x209bd7c97f0>

# 데이터프레임과 시리즈로 그래프 그리기

In [134]:
ax = plt.subplots()
ax = tips['total_bill'].plot.hist()

<IPython.core.display.Javascript object>

In [135]:
fig, ax = plt.subplots()
ax = tips[['total_bill', 'tip']].plot.hist(alpha=0.5, bins=20, ax=ax)

<IPython.core.display.Javascript object>

In [136]:
fig, ax = plt.subplots() 
ax = tips['tip'].plot.kde() 

<IPython.core.display.Javascript object>

In [137]:
fig, ax = plt.subplots() 
ax = tips.plot.scatter(x='total_bill', y='tip', ax=ax) 

<IPython.core.display.Javascript object>

In [139]:
fig, ax = plt.subplots() 
ax = tips.plot.hexbin(x='total_bill', y='tip', gridsize=10, ax=ax) 

<IPython.core.display.Javascript object>

In [140]:
fig, ax = plt.subplots()
ax = tips.plot.box(ax=ax)

<IPython.core.display.Javascript object>

In [144]:
sns.set_style('whitegrid')
fig, ax = plt.subplots()
ax = sns.violinplot(x='time', y='total_bill', hue='sex', data=tips, split=True)

<IPython.core.display.Javascript object>

# 알아두면 좋아요