# Plotly and Cufflinks

#### Plotly is an interactive visualization library.
#### Cufflinks connects plotly with pandas.

In [20]:
import pandas as pd
import numpy as np
%matplotlib inline

In [9]:
from plotly import __version__

In [10]:
print(__version__)

2.0.15


In [11]:
import cufflinks as cf

In [12]:
# support offline use of plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [13]:
# connects the Javascript to your notebook
init_notebook_mode(connected=True)

In [14]:
cf.go_offline()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [15]:
# data
df = pd.DataFrame(np.random.randn(100, 4), columns='A B C D'.split())

In [16]:
df.head()

Unnamed: 0,A,B,C,D
0,-1.083624,0.334868,0.012764,-2.083381
1,0.505054,-0.250022,-0.552702,2.318268
2,0.755736,-1.889608,0.847454,-1.160633
3,1.217286,-0.378511,-0.239218,-1.037316
4,-1.471095,-1.325245,-0.21813,-0.96159


In [17]:
df2 = pd.DataFrame({'Category': ['A', 'B', 'C'], 'Values': [32, 43, 50]})

In [18]:
df2

Unnamed: 0,Category,Values
0,A,32
1,B,43
2,C,50


In [22]:
# plot() uses matplotlib, but iplot() creates an interactive plot using plotly
# click on the key on the right to turn different plot lines on/off
df.iplot()

In [24]:
# scatterplot
# By default, plotly tries to connect the dots using lines. Need to use mode='markers'
# use size= to specify dot size
df.iplot(kind='scatter', x='A', y='B', mode='markers')

In [25]:
df2.iplot(kind='bar', x='Category', y='Values')

In [29]:
# Can use the bar plot in combination with an aggregate function or group by function
df.sum().iplot(kind='bar')

In [31]:
# box plot
df.iplot(kind='box')

In [38]:
# df3 = pd.DataFrame({'x': [1, 2, 3, 4, 5], 'y': [10, 20, 30, 20, 10], 'z': [500, 400, 300, 200, 100]})
df3 = pd.DataFrame({'x': [1, 2, 3, 4, 5], 'y': [10, 20, 30, 20, 10], 'z': [5, 4, 3, 2, 1]})

In [43]:
df3.iplot(kind='surface', colorscale='rdylbu')

In [46]:
# histogram
df['A'].iplot(kind='hist', bins=50)

In [47]:
# for histograms, if a column isn't specified, you get a comparison of each column in the dataset
df.iplot(kind='hist')

In [50]:
# spread plot (useful for stock data)
df[['A', 'B']].iplot(kind='spread')

In [52]:
# bubble plot - scatter plot where the size of each point is based on another column
# useful for world GDP, population, happiness factor, etc.
df.iplot(kind='bubble', x='A', y='B', size='C')

In [54]:
# scatter matrix - may take a long time to load with large datasets
df.scatter_matrix()