# Data Analysis with Python

### `Numpy`
`NumPy`, short for Numerical Python, has long been a cornerstone of numerical computing in Python. It provides the data structures, algorithms, and library glue needed for most scientific applications involving numerical data in Python

### `pandas`

`pandas` provides high-level data structures and functions designed to make working with structured or tabular data fast, easy, and expressive. The primary objects in `pandas` is the `DataFrame`, a tabular, column-oriented data structure with both row and column labels.
`pandas` blends the high-performance, array-computing ideas of `NumPy` with the flexible data manipulation capabilities of spreadsheets and relational databases.

### `matplotlib`

`matplotlib` is the most popular Python library for producing plots and other two-dimensional data visualizations. 

In [None]:
!pip install tables==3.5.1

# NumPy

In [None]:
import numpy as np

my_arr = np.arange(1000000)
my_list = list(range(1000000))

In [None]:
%time for _ in range(10): my_arr2 = my_arr * 2

In [None]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

 The basic object is an N-dimensional array object, or `ndarray`. An `ndarray` is a generic multidimensional container for homogeneous data.

In [None]:
data = np.random.randn(2, 3)
data

In [None]:
data * 10

In [None]:
data + data

In [None]:
data.shape

In [None]:
data.dtype

In [None]:
# create a ndarray from any iterator
a =  np.array([1,2,3,4])
a

In [None]:
type(a)

In [None]:
a2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
a2 = np.array(a2)
a2


In [None]:
# create arrays full of zeros
np.zeros(10)

In [None]:
np.zeros((5, 2))

In [None]:
# arange is the array version of range in Python
np.arange(15)

In [None]:
# arithmetic
a1 = np.random.randn(2, 3)
a2 = np.random.randn(2, 3)

print(a1)
print('****************')
print(a2)

In [None]:
a1 + a2

In [None]:
a1 - a2

In [None]:
a1 ** 0.5

In [None]:
a1 > a2

In [None]:
# indexing and slicing
a1 = np.arange(15, 25)

a1[5]

In [None]:
a1[:3]

In [None]:
a1 = np.arange(12).reshape(3,4)
a1

In [None]:
a1[1]

In [None]:
a1[1,2]

In [None]:
a1[:,1]

In [None]:
a1[1:,1:-1]

In [None]:
a2 = np.arange(12).reshape(2, 2, 3)

In [None]:
a2.ndim == len(a2.shape)

In [None]:
a2

In [None]:
a2[:,:,0]

In [None]:
# boolean indexing
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)

data

In [None]:
names == 'Bob'

In [None]:
data[names == 'Bob']

In [None]:
data[names != 'Bob']

In [None]:
data[~(names == 'Bob')]

In [None]:
data[names == 'Bob', :2]

In [None]:
d = data[(names == 'Bob') | (names == 'Will')]
d

In [None]:
d[d < 0]

In [None]:
d[d < 0] = 0
d

# Exercises

In [None]:
# fancy indexing
a1 = np.random.rand(12).reshape(3,4)
a1

In [None]:
a1[[0,1], [2,3]]

In [None]:
# Transpose an array
a1.T

In [None]:
np.dot(a1, a1.T)

In [None]:
# basic statistics
a1 = np.random.rand(12)
a1

In [None]:
a1.mean(), a1.std()

In [None]:
a1.cumsum()

In [None]:
a1.sum()

In [None]:
a2 = a1.reshape(4, 3)
a2

In [None]:
a2.mean(axis=0), a2.mean(axis=1)

In [None]:
# random number generators
samples = np.random.normal(size=(4, 4))

samples

In [None]:
samples = np.random.beta(2, 5, size=(4, 4))
samples

In [None]:
# i/o
a1 = np.arange(15).reshape(3, 5)
a2 = np.arange(15).reshape(3, 5)

In [None]:
np.save('a1.np', a1)

In [None]:
a3 = np.load('a1.np.npy')

In [None]:
a3

In [None]:
a1

In [None]:
np.savez('a.npz', a1=a1, a2=a2, a3=a3)

In [None]:
a = np.load('a.npz')

In [None]:
a['a1']

# Pandas

In [None]:
# let's start with Series

import pandas as pd

s = pd.Series([5,6,7,8])
s

In [None]:
s = pd.Series([5,6,7,8], index=['a', 'b', 'c', 'd'])
s

In [None]:
s['a']

In [None]:
d = {'a': 5, 'b':6, 'c':7, 'd':8}
d

In [None]:
s = pd.Series(d)
s

In [None]:
s[['a', 'd']]

In [None]:
s[s > 6]

In [None]:
# A DataFrame can be tought as a dictionary of Series sharing an index

data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

data


In [None]:
df = pd.DataFrame(data)
df

In [None]:
# let's specify a different index
# index are immutable and can hold duplicated values. They also behave like a list
df = pd.DataFrame(data, index=['first', 'second', 'third', 'fourth', 'fifth', 'sixth'])
df

In [None]:
df['year']

In [None]:
df.year

In [None]:
df.loc['third']

In [None]:
df.head()

In [None]:
df['debt'] = np.nan

In [None]:
df

In [None]:
val = pd.Series([-1.2, -1.5, -1.7], index=['second', 'fourth', 'fifth'])
df['debt'] = val
df

In [None]:
df['eastern'] = df.state == 'Ohio'
df

In [None]:
del df['eastern']
df

In [None]:
df.values

In [None]:
df.T

In [None]:
df

In [None]:
df1 = df.set_index('year')
df1

In [None]:
idx = list(df.index)
np.random.shuffle(idx)
print(list(df.index))
print(idx)

In [None]:
df.reindex(idx)

In [None]:
idx.append('seventh')
df.reindex(idx)

In [None]:
df.reindex(idx)

In [None]:
# drop columns or rows
df.drop(['state'], axis=1)

In [None]:
df.drop(['first', 'sixth'], axis=0)

In [None]:
df.drop(df.index[[0, 1]], axis=0)

In [None]:
# selection and slicing
df[:2] # for convenience it works on rows

In [None]:
df.iloc[:2]

In [None]:
df[['state', 'year']]

In [None]:
df[df['state'] == 'Ohio']

In [None]:
samples = ['sample_' + str(x) for x in range(1,6)]
variables = [chr(x) for x in range(97, 102)]

df2 = pd.DataFrame(np.random.rand(5,5), index=samples, columns=variables)

In [None]:
df2

In [None]:
df2 > 0.5

In [None]:
df2[df2 > 0.5]

In [None]:
df2[df2 > 0.5] = 1
df2

In [None]:
df

In [None]:
df.loc[['second', 'fourth'], ['year', 'debt']]

In [None]:
df.iloc[[1,3], [1,3]]

# Exercises

In [None]:
# create 2 numpy arrays, the first one composed by numbers from 1 to 15 and the second of 15 random numbers

# reshape the 2 arrays in order to have 3 rows and 5 columns the first one and 5 rows and 3 columns the second one

# calculate the dot product between them

# what is the value in the middle of the matrix?

# calculate the dot product of the random matrix with its own transpose matrix

# what is the difference between the rowwise mean and the columnwise mean?

# create a dataframe with 5 rows and 5 columns of random numbers sampled from a normal distribution with mean 0 and std 1

# change the column names and set it to be ['a', 'b', 'c', 'd', 'e']

# change every negative number into 0

# add an extra columns with the mean of each row and call it mean

# select the for elements correspoding to index 1, 2 and columns a, c and d

# change the column order of this last seletion to d, c, a


In [None]:
import numpy as np

a = np.arange(1, 16)
b = np.random.random(15)

In [None]:
a

In [None]:
b

In [None]:
a = a.reshape(3, 5)
b = b.reshape(5, 3)

c = np.dot(a, b)
c[1, 1]

In [None]:
d = np.dot(b, b.T)
d.sum(axis=1) - d.sum(axis=0)

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.normal(0, 1, size=25).reshape(5, 5))

In [None]:
df.columns = ['a', 'b', 'c', 'd', 'e']

In [None]:
df[df < 0] = 0

In [None]:
df['mean'] = np.mean(df, axis=1)

In [None]:
df1 = df.loc[[2,3], ['d', 'c', 'a']]

In [None]:
df1.columns = ['a', 'b', 'c']

In [None]:
df1

In [None]:
# arithmetics
df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))

df1.loc[1, 'b'] = np.nan

df1

In [None]:
df2

In [None]:
df1 + df2

In [None]:
df1.add(df2, fill_value=0)

In [None]:
1 / df1

In [None]:
df1.rdiv(1)

In [None]:
df1

In [None]:
# fillna and dropna
df1.fillna(df1.mean())

In [None]:
df1.dropna(axis=1)

In [None]:
# apply functions
# NumPy ufuncs (element-wise array methods) also work with pandas objects
np.mean(df1, axis=1)

In [None]:
# you can define your own functions to be applied along one axis
df1.apply(lambda x : x.max() - x.min(), axis=1)

In [None]:
df1.apply(lambda x : pd.Series([x.max(), x.min()], index=['max', 'min']), axis=1)

In [None]:
df1.applymap(lambda x : x**2 - x)

In [None]:
# sorting
np.random.shuffle(samples)
np.random.shuffle(variables)

df2 = pd.DataFrame(np.random.rand(5,5), index=samples, columns=variables)
df2


In [None]:
df2.sort_index(axis=0)

In [None]:
df2.sort_index(axis=1)

In [None]:
df2.sort_values(by='a')

In [None]:
df2.loc['sample_2','a'] = df2.loc['sample_4','a'] 
df2.sort_values(by=['a', 'b'])

In [None]:
# summarizing and descriptive statistics
df2

In [None]:
df2.sum(axis=1)

In [None]:
df2.describe()

In [None]:
# pandas i/o
iris = pd.read_csv('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/iris.csv', 
                   skiprows=4, sep='\t', names=['Sepal Length', 'Sepal Width', 'Petal Length',	'Petal Width',	'Species'])

In [None]:
iris.head()

In [None]:
df2.to_csv('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/df2.csv', sep=',')

In [None]:
# read / write excel files
iris = pd.read_excel('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/iris.xlsx')

In [None]:
iris.head()

In [None]:
df2.to_excel('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/df2.xlsx')

In [None]:
# read / write hdf5 files
# HDF5 (hierarchical data format) is a well-regarded file format intended for storing large quantities of scientific
# array data. It is available as a C library, and it has interfaces available in many other
# languages, including Java, Julia, MATLAB, and Python.

store = pd.HDFStore('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/iris.h5')
store['iris'] = iris
store.close()

In [None]:
store = pd.HDFStore('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/iris.h5')
new_iris = store['iris']
new_iris.head()

In [None]:
# You can also read data from SQL database directly, but this story is for another time

In [None]:
# grouping
iris.groupby('species').mean()

In [None]:
iris.groupby('species').max()

In [None]:
iris.groupby('species').transform('mean')

In [None]:
# multiindex, reshape and pivot

# Hierarchical (multi) indexing is an important feature of pandas that enables you to have multiple 
# (two or more) index levels on an axis. Somewhat abstractly, it provides a way for
# you to work with higher dimensional data in a lower dimensional form.

data = pd.Series(np.random.randn(9), index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 3, 1, 2, 2, 3]])

data

In [None]:
data.index

In [None]:
data.loc[:, 2]

In [None]:
data.unstack()

In [None]:
data.unstack().stack()

In [None]:
# what about DataFrames
frame = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])
frame

In [None]:
new_columns = pd.MultiIndex.from_arrays([['California', 'California', 'Utah'], ['Blue', 'Yellow', 'Blue']])
frame.columns = new_columns
frame

In [None]:
frame.sum(level=0, axis=1)

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/macrodata_long.csv', index_col=0)
data.head()

In [None]:
data.pivot('date', 'item', 'value').head()

In [None]:
data.set_index(['date', 'item']).unstack('item').head()

In [None]:
# melt
df = pd.DataFrame({'key': ['foo', 'bar', 'baz'], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
df

In [None]:
df.melt('key') # pd.melt(df, ['key'])

In [None]:
# merge
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'], 'data': range(7)})
df2 = pd.DataFrame({'key': ['a', 'b', 'd'], 'data': range(3)})

df1

In [None]:
df2

In [None]:
pd.merge(df1, df2, on='key')

In [None]:
pd.merge(df1, df2, on='key', how='outer', suffixes=['_left', '_right'])

In [None]:
# concat
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

In [None]:
pd.concat([s1, s2, s3])

In [None]:
pd.concat([s1, s2, s3], axis=1)

In [None]:
s4 = pd.concat([s1, s3])
s4

In [None]:
pd.concat([s1, s4], axis=1)

In [None]:
pd.concat([s1, s4], axis=1, join='inner')

In [None]:
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'], columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'], columns=['three', 'four'])

In [None]:
df1

In [None]:
df2

In [None]:
pd.concat([df1, df2], axis=1, keys=['first', 'second'])

In [None]:
pd.concat([df1, df2], axis=0, keys=['first', 'second'])

# Exercises

In [None]:
# How to fill NaN with rowwise mean?

# How to normalize all columns in a dataframe? Substract mean and divide for the standard deviation

# Concat 2 dataframes sharing the same index

# Create a Multiindex on columns

# Get the mean value for two levels

# Consider these dataframes

df_fruit_1 = pd.DataFrame({'fruit': ['apple', 'watermelon', 'strawberry'] * 3,
                    'country': ['IT'] * 3 + ['SP'] * 3 + ['FR'] * 3,
                    'price': np.random.randint(0, 15, 9)})

df_fruit_2 = pd.DataFrame({'fruit': ['apple', 'banana', 'peach'] * 3,
                    'price': np.random.randint(0, 15, 9)})

# How to merge these two dataframes by 1 column so they have only the common rows?

# How to change the LONG format for df_fruit_1 in order to have fruits as indexes and countries as columns?

In [None]:
import pandas as pd
import numpy as np

matrix = np.arange(1,16).reshape(3, 5)
df = pd.DataFrame(matrix, columns=['a', 'b', 'c', 'd', 'e'])
df.loc[[0,2], ['c']] = np.nan
df

In [None]:
df1 = df.apply(lambda x: x.fillna(x.mean()), axis=1)

In [None]:
(df1 - df1.mean()) / df1.std()

In [None]:
df2 = df1.apply(lambda x: (x - x.mean()) / x.std(), axis=1)

In [None]:
df2 = df2.drop(['a', 'b', 'e'], axis=1)

In [None]:
df1

In [None]:
df3 = pd.concat([df1, df2], axis=1)
df3.columns = pd.MultiIndex.from_arrays([['first'] * len(df1.columns) + ['second'] * len(df2.columns), list(df1.columns) + list(df2.columns)])
df3

In [None]:
pd.concat([df1, df2], axis=1, keys=['first', 'second'])

In [None]:
df3.mean(axis=1, level=1)

In [None]:
df_fruit_1

In [None]:
df_fruit_2

In [None]:
pd.merge(df_fruit_1, df_fruit_2, on=['fruit'], suffixes=['_left', '_right'])

In [None]:
df_fruit_1.pivot('fruit', 'country', 'price')

# Matplotlib

In [None]:
import numpy as np

data = np.arange(10)
data

In [None]:
import matplotlib.pyplot as plt

plt.plot(data) 

In [None]:
# Plots in matplotlib reside within a Figure object
fig = plt.figure(figsize=(20,10))

In [None]:
# You can’t make a plot with a blank figure. You have to create one or more subplots
ax1 = fig.add_subplot(2, 2, 1)

In [None]:
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

In [None]:
fig = plt.figure(figsize=(20,10))
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

plt.plot(np.random.randn(50).cumsum(), 'k--')

In [None]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

In [None]:
fig = plt.figure(figsize=(15,10))
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
ax3.plot(np.random.randn(50).cumsum(), 'k--')

In [None]:
# styles and colors and legend

from numpy.random import randn

fig = plt.figure(figsize=(15,10))
plt.plot(randn(30).cumsum(), color='black', linestyle='dashed', marker='*') # plt.plot(randn(30).cumsum(), 'ko--')

In [None]:
from numpy.random import randn

fig = plt.figure(figsize=(15,10))

data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')

In [None]:
# ticks, labels and title

from numpy.random import randn

fig = plt.figure(figsize=(15,10))

ax = fig.add_subplot(1, 1, 1)

ax.plot(np.random.randn(1000).cumsum())

ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=45, fontsize='large')

ax.set_title('My first matplotlib plot')

ax.set_xlabel('Stages')

In [None]:
# save  a figure
fig = plt.figure(figsize=(15,10))

ax = fig.add_subplot(1, 1, 1)

ax.plot(np.random.randn(1000).cumsum())

ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=30, fontsize='small')

ax.set_title('My first matplotlib plot')

ax.set_xlabel('Stages')

plt.savefig('figpath.png', dpi=400)

In [None]:
plt.rc('figure', figsize=(15, 10)) # set figsize globally

### Plotting with pandas and seaborn

In [None]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

In [None]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0), columns=['A', 'B', 'C', 'D'], index=np.arange(0, 100, 10))
df.plot()

In [None]:
# bar plot
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

In [None]:
df = pd.DataFrame(np.random.rand(6, 4), index=['one', 'two', 'three', 'four', 'five', 'six'], columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df

In [None]:
df.plot.bar()

In [None]:
df.plot.barh(stacked=True, alpha=0.5)

In [None]:
tips = pd.read_csv('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/tips.csv')
pd.crosstab(tips['day'], tips['size'])

In [None]:
tips[(tips['day'] == 'Sun') & (tips['size'] == 5)]

In [None]:
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts = party_counts.loc[:, 2:5] # remove 1 and 6
party_counts

In [None]:
party_pcts = party_counts.div(party_counts.sum(axis=1), axis=0) # normalize in order to sum up to 1
party_pcts

In [None]:
party_pcts.plot.bar()

In [None]:
# seaborn

import seaborn as sns

sns.set()

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()


In [None]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

In [None]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')
plt.legend(loc='center right')

In [None]:
sns.catplot(x='tip_pct', y='day', kind='box', data=tips[tips.tip_pct < 0.5])

In [None]:
tips['tip_pct'].plot.hist(bins=50)

In [None]:
tips['tip_pct'].plot.density()

In [None]:
sns.distplot(tips['tip_pct'], hist=True, kde=True)

In [None]:
macro = pd.read_csv('/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/macrodata.csv', index_col=0)
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data.head()

In [None]:
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log {} versus log {}'.format('m1', 'unemp'))

In [None]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

In [None]:
# read the iris.csv file into a dataframe
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

iris_fn = '/content/drive/MyDrive/Projects/Physalia-courses/Python/Notebook - Colab/iris.csv'
iris = pd.read_csv(iris_fn, skiprows=3, sep='\t')
iris.head()

In [None]:
# plot the histograms of the sepal_length of the three species in a 1x3 subplot (i.e. three plots one next to the other)

fig = plt.figure(figsize=(25,10))
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2)
ax3 = fig.add_subplot(1, 3, 3)

ax1.hist(iris[iris['species'] == 'setosa'].sepal_length, label='Iris setosa', alpha=0.6, color='blue')
ax2.hist(iris[iris['species'] == 'virginica'].sepal_length, label='Iris virginica', alpha=0.6, color='orange')
ax3.hist(iris[iris['species'] == 'versicolor'].sepal_length, label='Iris versicolor', alpha=0.6, color='pink')
ax1.set_title('Iris setosa')
ax2.set_title('Iris virginica')
ax3.set_title('Iris versicolor')
ax1.set_xlabel('Sepal length')
ax2.set_xlabel('Sepal length')
ax3.set_xlabel('Sepal length')

ax1.set_ylabel('Frequency')

In [None]:
# plot the boxplot of the sepal length of the three species
sns.catplot(x='sepal_length', y='species', kind='box', data=iris, height=8.27, aspect=11.7/8.27)

In [None]:
# plot a scatterplot of sepal_lenght vs petal_length for the three species

g = sns.FacetGrid(iris, col="species", margin_titles=True, height=5)
g.map(sns.regplot, "petal_length", "petal_width", color=".3", fit_reg=False)

In [None]:
# plot the distributions of petal_width of the three species
g = sns.FacetGrid(iris, col="species", margin_titles=True, height=5)
g.map(sns.kdeplot, 'petal_width')