# Pandas Visualization

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
# see the pre-defined styles provided.
plt.style.available

['Solarize_Light2',
 '_classic_test_patch',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark',
 'seaborn-dark-palette',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'tableau-colorblind10']

In [3]:
# use the 'seaborn-colorblind' style
plt.style.use('seaborn-colorblind')

### DataFrame.plot

In [4]:
np.random.seed(123)

df = pd.DataFrame({'A': np.random.randn(365).cumsum(0), 
                   'B': np.random.randn(365).cumsum(0) + 20,
                   'C': np.random.randn(365).cumsum(0) - 20}, 
                  index=pd.date_range('1/1/2017', periods=365))
df.head()

Unnamed: 0,A,B,C
2017-01-01,-1.085631,20.059291,-20.230904
2017-01-02,-0.088285,21.803332,-16.659325
2017-01-03,0.194693,20.835588,-17.055481
2017-01-04,-1.311601,21.255156,-17.093802
2017-01-05,-1.890202,21.462083,-19.518638


In [5]:
df.plot(); # add a semi-colon to the end of the plotting call to suppress unwanted output

<IPython.core.display.Javascript object>

We can select which plot we want to use by passing it into the 'kind' parameter.

In [6]:
df.plot('A','B', kind = 'scatter');

<IPython.core.display.Javascript object>

You can also choose the plot kind by using the `DataFrame.plot.kind` methods instead of providing the `kind` keyword argument.

`kind` :
- `'line'` : line plot (default)
- `'bar'` : vertical bar plot
- `'barh'` : horizontal bar plot
- `'hist'` : histogram
- `'box'` : boxplot
- `'kde'` : Kernel Density Estimation plot
- `'density'` : same as 'kde'
- `'area'` : area plot
- `'pie'` : pie plot
- `'scatter'` : scatter plot
- `'hexbin'` : hexbin plot

In [7]:
# create a scatter plot of columns 'A' and 'C', with changing color (c) and size (s) based on column 'B'
df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x12062ffa0>

In [8]:
ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
ax.set_aspect('equal')

<IPython.core.display.Javascript object>

In [9]:
df.plot.box();

<IPython.core.display.Javascript object>

In [10]:
df.plot.hist(alpha=0.7);

<IPython.core.display.Javascript object>

[Kernel density estimation plots](https://en.wikipedia.org/wiki/Kernel_density_estimation) are useful for deriving a smooth continuous function from a given sample.

In [11]:
df.plot.kde();

<IPython.core.display.Javascript object>

### pandas.tools.plotting

[Iris flower data set](https://en.wikipedia.org/wiki/Iris_flower_data_set)

In [13]:
iris = pd.read_csv('iris.csv')
iris.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [15]:
pd.plotting.scatter_matrix(iris);

<IPython.core.display.Javascript object>

In [18]:
plt.figure()
pd.plotting.parallel_coordinates(iris, 'Name');

<IPython.core.display.Javascript object>

# Seaborn

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib notebook

In [20]:
np.random.seed(1234)

v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')

In [21]:
plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50,150,5), label='v1');
plt.hist(v2, alpha=0.7, bins=np.arange(-50,150,5), label='v2');
plt.legend();

<IPython.core.display.Javascript object>

In [24]:
# plot a kernel density estimation over a stacked barchart
plt.figure()
plt.hist([v1, v2], histtype='barstacked', density=True);
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3);

<IPython.core.display.Javascript object>

In [25]:
plt.figure()
# we can pass keyword arguments for each individual component of the plot
sns.distplot(v3, hist_kws={'color': 'Teal'}, kde_kws={'color': 'Navy'});

<IPython.core.display.Javascript object>



In [26]:
sns.jointplot(v1, v2, alpha=0.4);



<IPython.core.display.Javascript object>

In [27]:
grid = sns.jointplot(v1, v2, alpha=0.4);
grid.ax_joint.set_aspect('equal')



<IPython.core.display.Javascript object>

In [28]:
sns.jointplot(v1, v2, kind='hex');



<IPython.core.display.Javascript object>

In [29]:
# set the seaborn style for all the following plots
sns.set_style('white')

sns.jointplot(v1, v2, kind='kde', space=0);



<IPython.core.display.Javascript object>

In [30]:
iris = pd.read_csv('iris.csv')
iris.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [31]:
sns.pairplot(iris, hue='Name', diag_kind='kde', size=2);



<IPython.core.display.Javascript object>

In [None]:
plt.figure(figsize=(8,6))
plt.subplot(121)
sns.swarmplot('Name', 'PetalLength', data=iris);
plt.subplot(122)
sns.violinplot('Name', 'PetalLength', data=iris);

In [32]:
df_2010 = pd.read_html("https://www.timeanddate.com/holidays/us/2010?hol=8441856", header=1)[0].iloc[1:-1]

In [33]:
df_2010

Unnamed: 0,Date,Unnamed: 1,Name,Type,Details
1,1 Jan,Friday,New Year's Day,Federal Holiday,
2,6 Jan,Wednesday,Epiphany,Christian,
3,7 Jan,Thursday,Orthodox Christmas Day,Orthodox,
4,7 Jan,Thursday,International Programmers' Day,Worldwide observance,
5,13 Jan,Wednesday,Stephen Foster Memorial Day,Observance,
...,...,...,...,...,...
404,25 Dec,Saturday,Christmas Day,Federal Holiday,
405,26 Dec,Sunday,Kwanzaa (first day),Observance,
406,26 Dec,Sunday,Day After Christmas Day,State holiday,Texas
407,31 Dec,Friday,Day off for New Year's Day,Federal Holiday,


In [42]:
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_Detroit_Pistons_seasons", header=1)[0]

In [43]:
df

Unnamed: 0,Fort Wayne Zollner Pistons (NBL),Fort Wayne Zollner Pistons (NBL).1,Fort Wayne Zollner Pistons (NBL).2,Fort Wayne Zollner Pistons (NBL).3,Fort Wayne Zollner Pistons (NBL).4,Fort Wayne Zollner Pistons (NBL).5,Fort Wayne Zollner Pistons (NBL).6,Fort Wayne Zollner Pistons (NBL).7,Fort Wayne Zollner Pistons (NBL).8,Fort Wayne Zollner Pistons (NBL).9,Fort Wayne Zollner Pistons (NBL).10,Fort Wayne Zollner Pistons (NBL).11
0,—,1941–42,—,,—,2nd,15,9,.625,5,Won NBL Semifinals (Wingfoots) 2–1 Lost NBL Ch...,
1,—,1942–43,—,,—,1st,17,6,.739,—,Won NBL Semifinals (Flyers) 2–1 Lost NBL Champ...,Bobby McDermott (MVP)
2,—,1943–44,—,,—,1st,18,4,.818,—,Won NBL Semifinals (Brassmen) 2–0 Won NBL Cham...,"Bobby McDermott (MVP, COY)"
3,—,1944–45,—,,Eastern,1st,25,5,.833,—,Won Division Semifinals (Transfers) 2–0 Won NB...,"Bobby McDermott (MVP, COY)"
4,—,1945–46,—,,Eastern,1st,26,8,.765,—,Lost Division Semifinals (Royals) 3–1,Bobby McDermott (MVP)
...,...,...,...,...,...,...,...,...,...,...,...,...
77,2016–17,2016–17,Eastern,10th,Central,5th,37,45,.451,14,,
78,2017–18,2017–18,Eastern,9th,Central,4th,39,43,.476,11,,
79,2018–19,2018–19,Eastern,8th,Central,3rd,41,41,.500,19,Lost First Round (Bucks) 4–0,
80,2019–20,2019–20,Eastern,13th,Central,4th,20,46,.303,32.5,,
