# Interactive Plots Python

- Bokeh (https://bokeh.pydata.org/en/latest/)
- plotly (https://plot.ly/python/)
- ...

# 1. Bokeh

- to install, **conda install bokeh** in command prompt
- https://bokeh.pydata.org/en/latest/docs/user_guide/quickstart.html#plot
- http://nbviewer.jupyter.org/github/bokeh/bokeh-notebooks/blob/master/tutorial/00%20-%20intro.ipynb

### What you can do with Bokeh
- http://bokeh.pydata.org/en/latest/docs/gallery.html#

In [10]:
import pandas as pd

In [11]:
df = pd.read_csv("data/titanic_train.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB


In [12]:
df = df.drop('Cabin', axis=1)

In [13]:
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 712 entries, 0 to 890
Data columns (total 11 columns):
PassengerId    712 non-null int64
Survived       712 non-null int64
Pclass         712 non-null int64
Name           712 non-null object
Sex            712 non-null object
Age            712 non-null float64
SibSp          712 non-null int64
Parch          712 non-null int64
Ticket         712 non-null object
Fare           712 non-null float64
Embarked       712 non-null object
dtypes: float64(2), int64(5), object(4)
memory usage: 66.8+ KB


In [14]:
from bokeh.io import output_notebook, show
output_notebook()

In [14]:
from bokeh.plotting import figure, output_file, show

# prepare some data
x = df['Age']
y = df['Fare']

# output to static HTML file
output_file("data/german.html")

# create a new plot with a title and axis labels
p = figure(title="simple line example", x_axis_label='x', y_axis_label='y')

# add a line renderer with legend and line thickness
p.circle(x, y, size=8)
p.xaxis.axis_label = 'Age'
p.yaxis.axis_label = 'Fare'

# show the results
show(p)

In [23]:
from bokeh.charts import Histogram, output_file, show

hist = Histogram(df, values="Fare", color="Survived", legend="top_right", bins=12)
show(hist)

In [24]:
p1 = figure(plot_width=600, plot_height=300, title="RESPONSE vs AMOUNT without jittering")

x = df['Survived']
y = df['Fare']

p1.circle(x=x, y=y)
    
# show the results
show(p1)

In [25]:
# improve graph using jitter

from bokeh.models import Jitter

colors = ["red", "olive"]

p2 = figure(plot_width=600, plot_height=300, title="RESPONSE vs AMOUNT with jittering")

for i, survive in enumerate(list(df.Survived.unique())):
    y = df[df['Survived'] == survive]['Fare']
    color = colors[i % len(colors)]
    
    p2.circle(x={'value': survive, 'transform': Jitter(width=0.2)}, y=y, color=color)
    
# show the results
show(p2)

In [1]:
#heatmap
from bokeh.charts import HeatMap, bins, show, output_file

hm1 = HeatMap(df, x='Survived', y='Sex')
show(hm1)

NameError: name 'df' is not defined

In [None]:
# try to create another heatmap using a different variable (Pclass)



In [15]:
from bokeh.charts import HeatMap, show, output_file

hm = HeatMap(df, x="Age", y="Survived", values='Fare', title='Survived vs Age vs Fare', stat=None)
show(hm)

AttributeError: 'pandas._libs.interval.Interval' object has no attribute 'split'

In [None]:
# try to create another heatmap using a different variable (x="Embarked")



### Geomap using Google Map

You need a Google Maps API key to make the plot work properly. Go to https://developers.google.com/maps/documentation/javascript/get-api-key

In [2]:
from bokeh.io import output_file, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

map_options = GMapOptions(lat=30.29, lng=-97.73, map_type="roadmap", zoom=11)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
plot.title.text = "Austin"

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
plot.api_key = "AIzaSyCFhduMbH0dsgSantY71vgXpLga6Ur8Tow"

source = ColumnDataSource(
    data=dict(
        lat=[30.29, 30.20, 30.29],
        lon=[-97.70, -97.74, -97.78],
    )
)

circle = Circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, line_color=None)
plot.add_glyph(source, circle)

plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
output_file("gmap_plot.html")
show(plot)

In [15]:
from bokeh.io import output_file, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

map_options = GMapOptions(lat=39.18, lng=-96.60, map_type="roadmap", zoom=11)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
plot.title.text = "Manhattan, KS"

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
plot.api_key = "AIzaSyCFhduMbH0dsgSantY71vgXpLga6Ur8Tow"

source = ColumnDataSource(
    data=dict(
        lat=[39.18, 39.28, 39.38],
        lon=[-96.57, -96.67, -96.47],
    )
)

circle = Circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, line_color=None)
plot.add_glyph(source, circle)

plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
output_file("gmap_plot.html")
show(plot)

In [4]:
# Create and deploy interactive data applications

from IPython.display import IFrame
IFrame('https://demo.bokehplots.com/apps/gapminder', width=900, height=500)

Go to http://demo.bokehplots.com/apps/ for more examples of apps

In [5]:
# Create and deploy interactive data applications

from IPython.display import IFrame
IFrame('https://demo.bokehplots.com/apps/movies', width=900, height=500)

More Tutorials
- https://github.com/bokeh/bokeh-notebooks/tree/master/tutorial
- http://nbviewer.jupyter.org/github/bokeh/bokeh-notebooks/blob/master/index.ipynb
- http://bokeh.pydata.org/en/latest/docs/gallery.html

# 2. plotly

- To install, **pip install plotly** (https://plot.ly/python/getting-started/)
- Go to https://plot.ly/ and sign up. 
- Once your account is created, generate a new API key (this can be done in setting)

### See what you can do with plotly
https://plot.ly/python/#basic-charts

In [6]:
!pip install plotly



In [16]:
import plotly 

# insert your user name and API key
plotly.tools.set_credentials_file(username='Lynlin', api_key='EjmCpYoJKbZ2NkMPNURD')

In [17]:
import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
init_notebook_mode()

labels = ['Oxygen','Hydrogen','Carbon_Dioxide','Nitrogen']
values = [4500,2500,1053,500]

trace = go.Pie(labels=labels, values=values)

iplot([trace], filename='data/basic_pie_chart')

In [18]:
# scientific chart

data = Data([
    Contour(
        z=[[10, 10.625, 12.5, 15.625, 20],
           [5.625, 6.25, 8.125, 11.25, 15.625],
           [2.5, 3.125, 5., 8.125, 12.5],
           [0.625, 1.25, 3.125, 6.25, 10.625],
           [0, 0.625, 2.5, 5.625, 10]]
    )
])

iplot(data)

In [22]:
trace1 = go.Area(
    r=[77.5, 72.5, 70.0, 45.0, 22.5, 42.5, 40.0, 62.5],
    t=['North', 'N-E', 'East', 'S-E', 'South', 'S-W', 'West', 'N-W'],
    name='11-14 m/s',
    marker=dict(
        color='rgb(106,81,163)'
    )
)
trace2 = go.Area(
    r=[57.49999999999999, 50.0, 45.0, 35.0, 20.0, 22.5, 37.5, 55.00000000000001],
    t=['North', 'N-E', 'East', 'S-E', 'South', 'S-W', 'West', 'N-W'],
    name='8-11 m/s',
    marker=dict(
        color='rgb(158,154,200)'
    )
)
trace3 = go.Area(
    r=[40.0, 30.0, 30.0, 35.0, 7.5, 7.5, 32.5, 40.0],
    t=['North', 'N-E', 'East', 'S-E', 'South', 'S-W', 'West', 'N-W'],
    name='5-8 m/s',
    marker=dict(
        color='rgb(203,201,226)'
    )
)
trace4 = go.Area(
    r=[20.0, 7.5, 15.0, 22.5, 2.5, 2.5, 12.5, 22.5],
    t=['North', 'N-E', 'East', 'S-E', 'South', 'S-W', 'West', 'N-W'],
    name='< 5 m/s',
    marker=dict(
        color='rgb(242,240,247)'
    )
)
data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
    title='Wind Speed Distribution in Laurel, NE',
    font=dict(
        size=16
    ),
    legend=dict(
        font=dict(
            size=16
        )
    ),
    radialaxis=dict(
        ticksuffix='%'
    ),
    orientation=270
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

Before running the below example. You need to do **pip install -U pandas_datareader** in command prompt

In [28]:
!pip install pandas-datareader



In [37]:
from datetime import datetime
import pandas_datareader.data as web

df = web.DataReader("aapl", 'yahoo',
                    datetime(2015, 1, 1),
                    datetime(2016, 7, 1))

data = [go.Scatter(x=df.index, y=df.High)]

iplot(data)

In [34]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')

for col in df.columns:
    df[col] = df[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

df['text'] = df['state'] + '<br>' +\
    'Beef '+df['beef']+' Dairy '+df['dairy']+'<br>'+\
    'Fruits '+df['total fruits']+' Veggies ' + df['total veggies']+'<br>'+\
    'Wheat '+df['wheat']+' Corn '+df['corn']

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df['code'],
        z = df['total exports'].astype(float),
        locationmode = 'USA-states',
        text = df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Millions USD")
        ) ]

layout = dict(
        title = '2011 US Agriculture Exports by State<br>(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )

In [35]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
df.head()

df['text'] = df['name'] + '<br>Population ' + (df['pop']/1e6).astype(str)+' million'
limits = [(0,2),(3,10),(11,20),(21,50),(50,3000)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","lightgrey"]
cities = []
scale = 5000

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        marker = dict(
            size = df_sub['pop']/scale,
            color = colors[i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]) )
    cities.append(city)

layout = dict(
        title = '2014 US city populations<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),
    )

fig = dict( data=cities, layout=layout )
iplot( fig, validate=False, filename='d3-bubble-map-populations' )

In [36]:
# Read data from a csv
z_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')

data = [
    go.Surface(
        z=z_data.as_matrix()
    )
]
layout = go.Layout(
    title='Mt Bruno Elevation',
    autosize=False,
    width=800,
    height=500,
    margin=dict(
        l=65,
        r=50,
        b=65,
        t=90
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='elevations-3d-surface')

## Visualizing CSV data using plotly
- https://plot.ly/python/ipython-notebook-tutorial/

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 378 entries, 2015-01-02 to 2016-07-01
Data columns (total 6 columns):
Open         378 non-null float64
High         378 non-null float64
Low          378 non-null float64
Close        378 non-null float64
Adj Close    378 non-null float64
Volume       378 non-null int64
dtypes: float64(5), int64(1)
memory usage: 30.7 KB


In [39]:
data = [Scatter(x=df.Age, y=df.Fare, mode = 'markers')]
iplot(data)

AttributeError: 'DataFrame' object has no attribute 'Age'

In [40]:
data = [Histogram(x=df.Age)]
iplot(data)

AttributeError: 'DataFrame' object has no attribute 'Age'

In [41]:
# try to create another Histogram using a different variable

data = [Histogram(x=df.Fare)]
iplot(data)

AttributeError: 'DataFrame' object has no attribute 'Fare'

In [42]:
# select rows of those survived
yes = df.loc[df['Survived'] == 1]
# select rows of those not survived
no = df.loc[df['Survived'] == 0]

# select fare column from yes group
x0=yes.Fare
# select fare column from no group
x1=no.Fare 

# create a histogram for those survived
Survived = go.Histogram(
    x=x0,
    opacity=0.75,
    name='Survived'
)

# create a histogram for those NOT survived
Not_survived = go.Histogram(
    x=x1,
    opacity=0.75,
    name='Not survived'
)


data = [Survived, Not_survived]
layout = go.Layout(barmode='overlay')
fig = go.Figure(data=data, layout=layout)

iplot(fig)

KeyError: 'Survived'

In [43]:
# use Embarked as variable

yes = df.loc[df['Survived'] == 1]
no = df.loc[df['Survived'] == 0]

x0=yes.Embarked
x1=no.Embarked

Survived = go.Histogram(
    x=x0,
    opacity=0.75,
    name='Survived'    
)
Not_survived = go.Histogram(
    x=x1,
    opacity=0.75,
    name='Not survived'    
)

data = [Survived, Not_survived]
layout = go.Layout(barmode='overlay')
fig = go.Figure(data=data, layout=layout)

iplot(fig)

KeyError: 'Survived'

In [None]:
# Use Sex as variable










In [None]:
# Use Pclass as variable













