# Plotly
- Highcharts war cool, aber wenn wir Jupyter Notebooks erzeugen müssen wir immer zwischen HTML und dem Notebook hin und her wechseln. 
- Außerdem lassen sich die Daten nicht einfach in HTML importieren. 
- Es wäre praktisch ein Tool zu haben was 
    a) interaktive Grafiken wie Highcharts ermöglicht
    b) auch sich von Jupyter aus "bedienen" lässt
- Genau das ist Plotly.
- http://plot.ly

## Installation

- Geht ins Anaconda und sucht nach dem plotly Package. 
- Für diejenigen die Pip nutzen:
 - ```pip install plotly```
 - ```pip install numpy```

# Das Hello World Beispiel in dem Fall Scatterplots

In [1]:
%matplotlib inline

In [2]:
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np

pyo.init_notebook_mode(connected=True)

np.random.seed(42)
random_x = np.random.randint(1,101,100)
random_y = np.random.randint(1,101,100)

data = [go.Scatter(
    x = random_x,
    y = random_y,
    mode = 'markers',
)]
fig = go.Figure(data=data)
pyo.iplot(fig, filename='hover-chart-basic')

## Titel und Achsen hinzufügen

In [5]:
random_y

array([26, 89, 60, 41, 29, 15, 45, 65, 89, 71,  9, 88,  1,  8, 88, 63, 11,
       81,  8, 35, 35, 33,  5, 41, 28,  7, 73, 72, 12, 34, 33, 48, 23, 62,
       88, 37, 99, 44, 86, 91, 35, 65, 99, 47, 78,  3,  1,  5, 90, 14, 27,
        9, 79, 15, 90, 42, 77, 51, 63, 96, 52, 96,  4, 94, 23, 15, 43, 29,
       36, 13, 32, 71, 59, 86, 28, 66, 42, 45, 62, 57,  6, 28, 28, 44, 84,
       30, 62, 75, 92, 89, 62, 97,  1, 27, 62, 77,  3, 70, 72, 27])

In [6]:
dict(title = 'Some random x-values')

{'title': 'Some random x-values'}

In [8]:
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np

np.random.seed(42)
random_x = np.random.randint(1,101,100)
random_y = np.random.randint(1,101,100)

my_data = [go.Scatter(
    x = random_x,
    y = random_y,
    mode = 'markers',
)]
my_layout = go.Layout(
    title = 'Random Data Scatterplot', # Graph title
    xaxis = dict(title = 'Some random x-values'), # x-axis label
    yaxis = dict(title = 'Some random y-values'), # y-axis label
    hovermode ='closest' # handles multiple points landing on the same vertical
)
fig = go.Figure(data=my_data, layout=my_layout)
pyo.iplot(fig, filename='scatter2.html')

## Labels ändern

In [20]:
np.random.seed(42)
random_x = np.random.randint(1,101,100)
random_y = np.random.randint(1,101,100)

data = [go.Scatter(
    x = random_x,
    y = random_y,
    mode = 'markers',
    marker = dict(      # change the marker style
        size = 12,
        color = 'red',
        symbol = 'cross',
        line = dict(
            width = 1,
        )
    )
)]
layout = go.Layout(
    title = 'Random Data Scatterplot', # Graph title
    xaxis = dict(title = 'Some random x-values'), # x-axis label
    yaxis = dict(title = 'Some random y-values'), # y-axis label
    hovermode ='closest' # handles multiple points landing on the same vertical
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='scatter3.html')

# Line Charts

In [28]:
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np

np.random.seed(56)
x_values = np.linspace(0, 1, 100) # 100 evenly spaced values
y_values = np.random.randn(100)   # 100 random values

# Create traces
trace0 = go.Scatter(
    x = x_values,
    y = y_values+5,
    mode = 'markers',
    name = 'markers'
)
trace1 = go.Scatter(
    x = x_values,
    y = y_values,
    mode = 'lines+markers',
    name = 'lines+markers'
)
trace2 = go.Scatter(
    x = x_values,
    y = y_values-5,
    mode = 'lines',
    name = 'lines'
)
trace3 = go.Scatter(
    x= x_values+1,
    y= y_values,
    mode = "line",
    name = "plotti"
)
data = [trace0, trace1, trace2,trace3]  # assign traces to data
layout = go.Layout(
    title = 'Line chart showing three different modes'
)
fig = go.Figure(data=data,layout=layout)
pyo.iplot(fig, filename='line1.html')

# Bar Charts

In [29]:
import pandas as pd
df = pd.read_csv('data/2018WinterOlympics.csv')
df.head()

Unnamed: 0,Rank,NOC,Gold,Silver,Bronze,Total
0,1,Norway,14,14,11,39
1,2,Germany,14,10,7,31
2,3,Canada,11,8,10,29
3,4,United States,9,8,6,23
4,5,Netherlands,8,6,6,20


In [32]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('data/2018WinterOlympics.csv')
df = df[df["Total"] > 10] #nur länder mit mehr als 10 medallien

data = [go.Bar(
    x=df['NOC'],  # NOC == National Olympic Committee
    y=df['Total']
)]
layout = go.Layout(
    title='2018 Winter Olympiade Medallien nach Land'
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='bar1.html')

In [33]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('data/2018WinterOlympics.csv')

trace1 = go.Bar(
    x=df['NOC'],  # NOC stands for National Olympic Committee
    y=df['Gold'],
    name = 'Gold',
    marker=dict(color='#FFD700') # set the marker color to gold
)
trace2 = go.Bar(
    x=df['NOC'],
    y=df['Silver'],
    name='Silver',
    marker=dict(color='#9EA0A1') # set the marker color to silver
)
trace3 = go.Bar(
    x=df['NOC'],
    y=df['Bronze'],
    name='Bronze',
    marker=dict(color='#CD7F32') # set the marker color to bronze
)
data = [trace1, trace2, trace3]
layout = go.Layout(
    title='2018 Winter Olympiade Medallien nach Land'
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='bar2.html')


# Stacked Barcharts

In [34]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('data/2018WinterOlympics.csv')

trace1 = go.Bar(
    x=df['NOC'],  # NOC stands for National Olympic Committee
    y=df['Gold'],
    name = 'Gold',
    marker=dict(color='#FFD700') # set the marker color to gold
)
trace2 = go.Bar(
    x=df['NOC'],
    y=df['Silver'],
    name='Silver',
    marker=dict(color='#9EA0A1') # set the marker color to silver
)
trace3 = go.Bar(
    x=df['NOC'],
    y=df['Bronze'],
    name='Bronze',
    marker=dict(color='#CD7F32') # set the marker color to bronze
)
data = [trace1, trace2, trace3]
layout = go.Layout(
    title='2018 Winter Olympic Medals by Country',
    barmode='stack'
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='bar3.html')


# Bubble Charts

In [11]:
df = pd.read_csv('data/mpg.csv')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [38]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('data/mpg.csv')

data = [go.Scatter(          # start with a normal scatter plot
    x=df['weight'],
    y=df['horsepower'],
    text=df['name'],
    mode='markers',
    marker=dict(size=1.5*df['mpg']) # set the marker size
)]

layout = go.Layout(
    title='Weight vs. horsepower',
    xaxis = dict(title = 'weight'), # x-axis label
    yaxis = dict(title = 'horsepower'),        # y-axis label
    hovermode='closest'
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='bubble1.html')

# Boxplots
- https://keepingupwiththequants.weebly.com/qcs-letters.html

In [39]:
import plotly.offline as pyo
import plotly.graph_objs as go

snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

data = [
    go.Box(
        y=snodgrass,
        name='QCS'
    ),
    go.Box(
        y=twain,
        name='MT'
    )
]
layout = go.Layout(
    title = 'Comparison of three-letter-word frequencies<br>\
    between Quintus Curtius Snodgrass and Mark Twain'
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='box3.html')

# Histogramme

In [40]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('data/mpg.csv')

data = [go.Histogram(
    x=df['mpg']
)]

layout = go.Layout(
    title="Miles per Gallon Histogramm <br>\
    von Fahrzeugen aus den 70ern'"
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='basic_histogram.html')

In [42]:
df = pd.read_csv('data/arrhythmia.csv')
df.head()

Unnamed: 0,Age,Sex,Height
0,68,1,146
1,58,1,148
2,36,1,149
3,34,1,150
4,40,1,150


In [43]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd


data = [go.Histogram(
    x=df[df['Sex']==0]['Height'],
    opacity=0.75,
    name='Männlich'
),
go.Histogram(
    x=df[df['Sex']==1]['Height'],
    opacity=0.75,
    name='Weiblich'
)]

layout = go.Layout(
    barmode='overlay',
    title="Größenverteilung nach Geschlecht"
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='basic_histogram2.html')


In [46]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd


data = [go.Histogram(
    x=df['Height'],
    opacity=0.75,
    name='Höhe'
)]
layout = go.Layout(
    title="Größenverteilung"
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='basic_histogram2.html')



# Heatmaps

In [47]:
df = pd.read_csv('data/2010SantaBarbaraCA.csv')
df.head()

Unnamed: 0,LST_DATE,DAY,LST_TIME,T_HR_AVG
0,20100601,TUESDAY,0:00,12.7
1,20100601,TUESDAY,1:00,12.7
2,20100601,TUESDAY,2:00,12.3
3,20100601,TUESDAY,3:00,12.5
4,20100601,TUESDAY,4:00,12.7


In [48]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('data/2010SantaBarbaraCA.csv')

data = [go.Heatmap(
    x=df['DAY'], # Wochentag
    y=df['LST_TIME'], # Zeit
    z=df['T_HR_AVG'].values.tolist(), # Temperatur
    colorscale='Jet'
)]

layout = go.Layout(
    title='Temperaturen in Stundenauflösung vom Juni 2010 in<br>\
    in Santa Barbara, USA'
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig, filename='Santa_Barbara.html')


# Mehrere Heatmaps auf einmal!

In [49]:
import plotly.offline as pyo
import plotly.graph_objs as go
from plotly import tools
import pandas as pd

df1 = pd.read_csv('data/2010SitkaAK.csv')
df2 = pd.read_csv('data/2010SantaBarbaraCA.csv')
df3 = pd.read_csv('data/2010YumaAZ.csv')

trace1 = go.Heatmap(
    x=df1['DAY'],
    y=df1['LST_TIME'],
    z=df1['T_HR_AVG'].values.tolist(),
    colorscale='Jet',
    zmin = 5, zmax = 40 # add max/min color values to make each plot consistent
)
trace2 = go.Heatmap(
    x=df2['DAY'],
    y=df2['LST_TIME'],
    z=df2['T_HR_AVG'].values.tolist(),
    colorscale='Jet',
    zmin = 5, zmax = 40
)
trace3 = go.Heatmap(
    x=df3['DAY'],
    y=df3['LST_TIME'],
    z=df3['T_HR_AVG'].values.tolist(),
    colorscale='Jet',
    zmin = 5, zmax = 40
)

fig = tools.make_subplots(rows=1, cols=3,
    subplot_titles=('Sitka, AK','Santa Barbara, CA', 'Yuma, AZ'),
    shared_yaxes = True,  # this makes the hours appear only on the left
)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)

fig['layout'].update(      # access the layout directly!
    title='Stundenauflösung Temperaturen, June 1-7, 2010'
)
pyo.iplot(fig, filename='AllThree.html')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]



In [51]:
import plotly.offline as py


data = dict(
    type='sankey',
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(
        color = "black",
        width = 0.5
      ),
      label = ["A1", "A2", "B1", "B2", "C1", "C2"],
      color = ["blue", "blue", "blue", "blue", "blue", "blue"]
    ),
    link = dict(
      source = [0,1,0,2,3,3],
      target = [2,3,3,4,4,5],
      value = [8,4,2,8,4,2]
  ))

layout =  dict(
    title = "Basic Sankey Diagram",
    font = dict(
      size = 10
    )
)

fig = dict(data=[data], layout=layout)
py.iplot(fig, validate=False)