In [95]:
#PLOTLY - interactive plots
#Basic plotly examples/playground to visualize data
import numpy as np
import pandas as pd
import scipy as sp
import plotly.plotly as py
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
#plotly.tools.set_credentials_file(username='DemoAccount', api_key='lr1c37zw81')
np.random.seed(42)
pyo.init_notebook_mode(connected=True)

In [42]:
#Scatterplot
random_x = np.random.randint(1,101,100)
random_y = np.random.randint(1,101,100)
data = [go.Scatter(x=random_x, 
                   y=random_y, 
                   mode="markers",
                   marker = dict(
                       size=12,
                       color="rgb(0,0,40)",
                       symbol="circle",
                       line = {"width":1}
                   )
                  )]
layout = go.Layout(title="Random nums",
                xaxis=dict(title='NumX'),
                yaxis=dict(title='NumY'),
                  hovermode="closest")
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig,filename='plot_out/scatterExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\scatterExample.html'

In [47]:
#Line charts
x_values = np.linspace(0,1,100)
y_values = np.random.randn(100) #normal dist

trace0 = go.Scatter(x=x_values, y=y_values+5,
                  mode = "markers", name = "markers")
trace1 = go.Scatter(x=x_values, y=y_values,
                  mode = "lines", name = "mylines")
trace2 = go.Scatter(x=x_values, y=y_values-5,
                  mode = "lines+markers", name = "best one")
data = [trace0,trace1,trace2]
layout = go.Layout(title="Line chart")
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig,filename='plot_out/lineExample.html')


'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\lineExample.html'

In [48]:
#Nicer example using real data
df = pd.read_csv("data/plotly/nst-est2017-alldata.csv")
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2015,RDOMESTICMIG2016,RDOMESTICMIG2017,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017
0,10.0,0,0,0.0,United States,308745538.0,308758105.0,309338421.0,311644280.0,313993272.0,...,0.0,0.0,0.0,2.7209,2.920371,2.883643,3.173228,3.516743,3.513394,3.423941
1,20.0,1,0,0.0,Northeast Region,55317240.0,55318350.0,55388349.0,55642659.0,55860261.0,...,-6.103092,-6.619089,-5.55957,1.46795,0.779137,0.605873,-0.082832,-0.903931,-1.307503,-0.28893
2,20.0,2,0,0.0,Midwest Region,66927001.0,66929794.0,66973360.0,67141501.0,67318295.0,...,-3.458531,-3.307295,-2.30464,-1.187519,-1.010696,-0.120354,-0.752477,-1.323952,-1.160735,-0.191323
3,20.0,3,0,0.0,South Region,114555744.0,114563024.0,114869241.0,116060993.0,117291728.0,...,3.788037,3.592695,2.900528,5.544289,5.831747,5.362083,6.31731,7.336162,7.113818,6.30401
4,20.0,4,0,0.0,West Region,71945553.0,71946937.0,72107471.0,72799127.0,73522988.0,...,1.61345,2.099001,1.475519,2.798796,3.521423,3.396627,4.163576,5.067452,5.488965,4.737979


In [52]:
df2 = df[df["DIVISION"] == "1"] #filter on state NE
df2.set_index("NAME",inplace=True)
list_pop_col = [col for col in df2.columns if col.startswith("POP")] #list comprehension to only select colnames starting with "POP" -> only population cols
df2 = df2[list_pop_col]

In [55]:
#d1 = go.Scatter(x=df2.columns,
#               y=df2.loc[name],
#               mode="lines",
#               name=name)
data = [go.Scatter(x=df2.columns,
               y=df2.loc[name],
               mode="lines",
               name=name) for name in df2.index]
pyo.plot(data,filename='plot_out/line2Example.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\temp-plot.html'

In [56]:
#Bar charts
df = pd.read_csv("data/plotly/2018WinterOlympics.csv")
df.head()

Unnamed: 0,Rank,NOC,Gold,Silver,Bronze,Total
0,1,Norway,14,14,11,39
1,2,Germany,14,10,7,31
2,3,Canada,11,8,10,29
3,4,United States,9,8,6,23
4,5,Netherlands,8,6,6,20


In [64]:
trace1 = go.Bar(x=df["NOC"], 
                y=df["Gold"], 
                name="Gold",
                marker={"color":"#FFD700"})
trace2 = go.Bar(x=df["NOC"], 
                y=df["Silver"], 
                name="Silver",
                marker={"color":"#9EA0A1"})
trace3 = go.Bar(x=df["NOC"], 
                y=df["Bronze"], 
                name="Bronze",
                marker={"color":"#CD7F32"})

#data = [go.Bar(x=df["NOC"], y=df["Total"])]
data = [trace1, trace2, trace3]
layout = go.Layout(title="Medals",barmode="stack") #stack gives stacked chart, none = nested
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='plot_out/barExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\barExample.html'

In [66]:
#Bubble plots
df = pd.read_csv("data/plotly/mpg.csv")
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [72]:
data = [go.Scatter(x=df["horsepower"],
                  y=df["mpg"],
                  text=df["name"],
                  mode="markers",
                  marker=dict(size=df["weight"]/100, color=df["cylinders"],showscale=True))]
layout = go.Layout(title="Bubble chart")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='plot_out/bubbleExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\bubbleExample.html'

In [78]:
#Box plots
y = [1,14,14,15,16,18,18,19,19,20,20,23,24,26,27,27,28,29,33,54]
snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]
#data = [go.Box(y=y,boxpoints="all",jitter=0.3,pointpos=0)]
#data = [go.Box(y=y,boxpoints="outliers")]
data = [go.Box(y=snodgrass,name="Snoddgrass"),
       go.Box(y=twain,name="Twain")]
pyo.plot(data,filename='plot_out/boxExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\boxExample.html'

In [79]:
#Histograms
df = pd.read_csv("data/plotly/mpg.csv")
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [81]:
data = [go.Histogram(x=df["mpg"],xbins=dict(start=0,end=50,size=2))]
layout = go.Layout(title="Histogram")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='plot_out/histExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\histExample.html'

In [88]:
#Distribution plots
x1 = np.random.randn(200)-2
x2 = np.random.randn(200)
x3 = np.random.randn(200)+2
x4 = np.random.randn(200)+4
hist_data = [x1,x2,x3,x4]
group_labels = ["X1","X2","X3","X4"]

fig = ff.create_distplot(hist_data,group_labels,bin_size=[.2,.2,.2,.2])
pyo.plot(fig,filename='plot_out/distExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\distExample.html'

In [89]:
#Heatmaps
df = pd.read_csv("data/plotly/2010SantaBarbaraCA.csv")
df.head()

Unnamed: 0,LST_DATE,DAY,LST_TIME,T_HR_AVG
0,20100601,TUESDAY,0:00,12.7
1,20100601,TUESDAY,1:00,12.7
2,20100601,TUESDAY,2:00,12.3
3,20100601,TUESDAY,3:00,12.5
4,20100601,TUESDAY,4:00,12.7


In [92]:
data = [go.Heatmap(x=df["DAY"],
                  y=df["LST_TIME"],
                  z=df["T_HR_AVG"].values.tolist(),
                   colorscale="Jet")] # z needs to be on list
layout = go.Layout(title="SB CA Temp")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='plot_out/heatmapExample.html')

'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\heatmapExample.html'

In [93]:
#Multiple heatmaps with subplots
df1 = pd.read_csv("data/plotly/2010SitkaAK.csv")
df2 = pd.read_csv("data/plotly/2010SantaBarbaraCA.csv")
df3 = pd.read_csv("data/plotly/2010YumaAZ.csv")

In [99]:
trace1 = go.Heatmap(x=df1["DAY"],
                  y=df1["LST_TIME"],
                  z=df1["T_HR_AVG"].values.tolist(),
                   colorscale="Jet",
                   zmin=5,
                   zmax=40)
trace2 = go.Heatmap(x=df2["DAY"],
                  y=df2["LST_TIME"],
                  z=df2["T_HR_AVG"].values.tolist(),
                   colorscale="Jet",
                   zmin=5,
                   zmax=40)
trace3 = go.Heatmap(x=df3["DAY"],
                  y=df3["LST_TIME"],
                  z=df3["T_HR_AVG"].values.tolist(),
                   colorscale="Jet",
                   zmin=5,
                   zmax=40)

fig = tools.make_subplots(rows=1,cols=3,subplot_titles=["Sitka","SB","Yuma"],
                         shared_yaxes=False)
#Put traces in correct section:
fig.append_trace(trace1,1,1) 
fig.append_trace(trace2,1,2)
fig.append_trace(trace3,1,3) 
fig["layout"].update(title="Temps for 3 cities")
pyo.plot(fig,filename='plot_out/multiheatmapExample.html')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]



'file://C:\\Users\\Ruthberg\\DevProjects\\notebooks\\plot_out\\multiheatmapExample.html'