In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
%matplotlib

Using matplotlib backend: <object object at 0x000002D8EEC96AD0>


The line magic command %matplotlib inline enables the drawing of matplotlib figures in the IPython environment. Once this command is executed in any cell, the matplotlib plots will appear directly below the cell in which the plot function was called for the rest of the session.

# Plotting with Pandas

In [3]:
# see what style is available
plt.style.available

['Solarize_Light2',
 '_classic_test_patch',
 '_mpl-gallery',
 '_mpl-gallery-nogrid',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark',
 'seaborn-dark-palette',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'tableau-colorblind10']

In [4]:
plt.style.use('seaborn-colorblind')

A random seed in NumPy is an initial value used to initialize the random number generator. It ensures that the sequence of random numbers generated is reproducible and consistent across different program runs.

In [5]:
np.random.seed(333)
# cumsum() Return the cumulative sum of the elements along a given axis.
# random.randn()
# The numpy. random. randn() function creates an array of specified shape and 
# fills it with random values as per standard normal distribution
df=pd.DataFrame({'A': np.random.randn(365).cumsum(0),
               'B': np.random.randn(365).cumsum(0)+20,
               'C': np.random.randn(365).cumsum(0)-20},
                index=pd.date_range('1/1/2017',periods=365))
df.shape

(365, 3)

In [6]:
df.plot()

<AxesSubplot:>

In [7]:
# the color and style are different from matplotlib
# you just add a parameter kind=' ' to specify which plot to use
df.plot('A','B',kind='scatter');

In [8]:
df.plot.line('A','C',legend=False)

<AxesSubplot:xlabel='A'>

In [9]:
df.plot.scatter('A', 'C', c='B',colormap='viridis')

<AxesSubplot:xlabel='A', ylabel='C'>

In [10]:
ax=df.plot.scatter('A','C',c='B',s=df['B'],colormap='viridis')
ax.set_aspect('equal')
# Set the aspect ratio of the axes scaling, i.e. y/x-scale.

In [11]:
# we can do all kind of plots with pandas
df.plot.box()

<AxesSubplot:>

In [12]:
df.plot.hist(alpha=0.5)

<AxesSubplot:ylabel='Frequency'>

In [13]:
df.plot.kde()
# A kernel density estimate (KDE) plot is a method for visualizing the distribution
# of observations in a dataset, analogous to a histogram.

<AxesSubplot:ylabel='Density'>

In [14]:
iris=pd.read_csv('iris.csv')
iris.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [15]:
pd.plotting.scatter_matrix(iris,alpha=0.5);

A scatter plot matrix is a grid of several scatter plots of up to five numeric variables. The matrix includes individual scatter plots for every combination of variables. A scatter plot matrix can answer questions about your data, such as: What are the relationships between several variables?

In [16]:
plt.figure()
pd.plotting.parallel_coordinates(iris, 'Name');

 Parallel plot or parallel coordinates plot allows to compare the feature of several individual observations ( series ) on a set of numeric variables. Each vertical bar represents a variable and often has its own scale. (The units can even be different).

# Plotting with Seaborn

In [17]:
np.random.seed(1234)

v1= pd.Series(np.random.normal(0,10,1000), name='v1')
v2= pd.Series(2*v1 + np.random.normal(60,15,1000),name='v2')

In [18]:
plt.figure()
plt.hist(v1,alpha=0.7,bins=np.arange(-50,150,5),label='v1')
plt.hist(v2,alpha=0.7,bins=np.arange(-50,150,5),label='v2')
plt.legend()

<matplotlib.legend.Legend at 0x2d8f86a2fa0>

In [19]:
plt.figure()
plt.hist([v1,v2],histtype='barstacked')
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3)

<AxesSubplot:ylabel='Density'>

In [20]:
plt.figure()
sns.distplot(v3,hist_kws={'color':'Teal'},kde_kws={'color':'Navy'})



<AxesSubplot:ylabel='Density'>

In [21]:
grid=sns.jointplot(v1,v2,alpha=0.4,kind='hex');
grid.ax_joint.set_aspect('auto')



In [22]:
sns.set_style('white')
sns.jointplot(v1,v2,kind='kde',space=0)



<seaborn.axisgrid.JointGrid at 0x2d8f88b6490>

In [23]:
sns.pairplot(iris,hue='Name',diag_kind='kde')

<seaborn.axisgrid.PairGrid at 0x2d8f8891610>

In [24]:
plt.figure(figsize=(12,8))
plt.subplot(121)
sns.swarmplot('Name','PetalLength', data=iris)
plt.subplot(122)
sns.violinplot('Name', 'PetalLength', data=iris)



<AxesSubplot:xlabel='Name', ylabel='PetalLength'>