<a href="https://colab.research.google.com/github/saykim/ds/blob/main/Python_hvplot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Documentation

In [None]:
from IPython.display import IFrame
documentation = IFrame(src='https://hvplot.pyviz.org', width=1200, height=500)
display(documentation)

# Notes

For more information and download instructions, go to python.org, python package index, search for hvplot

# Imports

In [None]:
import hvplot.pandas
import pandas as pd
import numpy as np
from vega_datasets import data as vds

# Line Plots

In [None]:
df_line = pd.DataFrame({'x': list(np.arange(0, 100, 10)),
                        'y': np.random.rand(10)})
df_line

Unnamed: 0,x,y
0,0,0.621906
1,10,0.574359
2,20,0.140822
3,30,0.837573
4,40,0.663341
5,50,0.997058
6,60,0.246503
7,70,0.036955
8,80,0.437452
9,90,0.928741


In [None]:
# line plot

df_line.hvplot(x='x', y='y', kind='line')

In [None]:
# line plot (another way)

df_line.hvplot.line(x='x', y='y')

In [None]:
# line plot with formatting

df_line.hvplot.line(x='x', 
                    y='y', 
                    width=600, 
                    height=400,  
                    title='Title',
                    ylim=(0, 1),
                    xlabel='x-axis',
                    ylabel='y-axis',
                    grid=True)

In [None]:
# see help screen for plot
# hvplot.help('line')

In [None]:
df_mult_line = pd.DataFrame({'x': list(np.arange(0, 100, 10)),
                             'y1': np.random.rand(10),
                             'y2': np.random.rand(10),
                             'y3': np.random.rand(10)})

df_mult_line

# df_mult_line.set_index('x')

Unnamed: 0,x,y1,y2,y3
0,0,0.933681,0.441308,0.082343
1,10,0.428557,0.843908,0.959768
2,20,0.129788,0.902745,0.480931
3,30,0.294364,0.336656,0.953065
4,40,0.038823,0.931936,0.789232
5,50,0.002185,0.125848,0.039226
6,60,0.242987,0.565012,0.603769
7,70,0.575429,0.186925,0.174753
8,80,0.614562,0.307705,0.890814
9,90,0.251129,0.330533,0.58009


In [None]:
# multiple line plot
# set index to x

df_mult_line.set_index('x').hvplot(title='Title', ylabel='y', kind='line')

In [None]:
# multiple line plot
# assign x and y
df_mult_line.hvplot(x='x', y=['y1', 'y2', 'y3'], kind='line')

# Bar Charts

In [None]:
df_bar = pd.DataFrame({'x': list('ABCDEF'),
                       'y': np.random.rand(6)})
df_bar

Unnamed: 0,x,y
0,A,0.373421
1,B,0.583237
2,C,0.706616
3,D,0.696984
4,E,0.008309
5,F,0.541702


In [None]:
# bar chart
# use rot to rotate labels if needed
df_bar.hvplot(x='x', y='y', kind='bar', rot=90)

In [None]:
df_grouped_bar = pd.DataFrame({'Year': ['YR05','YR06','YR07','YR08','YR09','YR10'],
                               'A': np.random.rand(6),
                               'B': np.random.rand(6),
                               'C': np.random.rand(6)})
df_grouped_bar.set_index('Year')
# df_grouped_bar

Unnamed: 0_level_0,A,B,C
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YR05,0.29515,0.13141,0.805972
YR06,0.932137,0.061327,0.448085
YR07,0.791604,0.129467,0.235989
YR08,0.9687,0.48564,0.115466
YR09,0.798599,0.264986,0.282834
YR10,0.983907,0.278462,0.285005


In [None]:
# grouped bar chart

df_grouped_bar.set_index('Year').hvplot(kind='bar', stacked=False)

In [None]:
# stacked bar chart

df_grouped_bar.set_index('Year').hvplot(kind='bar', stacked=True)

# Scatter Plots

In [None]:
cars = vds.cars()
cars.head()

Unnamed: 0,Acceleration,Cylinders,Displacement,Horsepower,Miles_per_Gallon,Name,Origin,Weight_in_lbs,Year
0,12.0,8,307.0,130.0,18.0,chevrolet chevelle malibu,USA,3504,1970-01-01
1,11.5,8,350.0,165.0,15.0,buick skylark 320,USA,3693,1970-01-01
2,11.0,8,318.0,150.0,18.0,plymouth satellite,USA,3436,1970-01-01
3,12.0,8,304.0,150.0,16.0,amc rebel sst,USA,3433,1970-01-01
4,10.5,8,302.0,140.0,17.0,ford torino,USA,3449,1970-01-01


In [None]:
# scatter plot

cars.hvplot(x='Weight_in_lbs', 
            y='Miles_per_Gallon', 
            kind='scatter',
            c='Horsepower')

In [None]:
iris = vds.iris()
iris.head()

Unnamed: 0,petalLength,petalWidth,sepalLength,sepalWidth,species
0,1.4,0.2,5.1,3.5,setosa
1,1.4,0.2,4.9,3.0,setosa
2,1.3,0.2,4.7,3.2,setosa
3,1.5,0.2,4.6,3.1,setosa
4,1.4,0.2,5.0,3.6,setosa


In [None]:
# scatter plot by species

iris.hvplot(x='petalLength', y='petalWidth', by='species', kind='scatter')

# HexBin Plots
Hexbin plots can be a useful alternative to scatter plots if your data are too dense to plot each point individually.

In [None]:
# diamonds data from kaggle
diamonds = pd.read_csv('diamonds data.csv')
df_diamonds = diamonds[['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z']]
df_diamonds.tail()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.5
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61
53937,0.7,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74
53939,0.75,Ideal,D,SI2,62.2,55.0,2757,5.83,5.87,3.64


In [None]:
# use plus (+) operator to lay plots next to each other
layout = df_diamonds.hvplot(x='carat', y='price', kind='scatter') +\
df_diamonds.hvplot(x='carat', y='price', kind='hexbin')
layout.cols(1)

# Stacked Area

In [None]:
df_stacked_area = pd.DataFrame({'x': [1, 2, 3, 4, 5],
                                'y1': [1, 2, 4, 3, 4],
                                'y2': [1, 4, 2, 2, 3],
                                'y3': [3, 4, 5, 2, 1]})

df_stacked_area

Unnamed: 0,x,y1,y2,y3
0,1,1,1,3
1,2,2,4,4
2,3,4,2,5
3,4,3,2,2
4,5,4,3,1


In [None]:
# stacked area chart

df_stacked_area.hvplot(x='x', y=['y1', 'y2', 'y3'], kind='area', stacked=True)

# Histogram

In [None]:
cars.hvplot.hist('Horsepower', bins=150)

# Kernel Density Estimate

In [None]:
df_kde = pd.DataFrame({'A': np.random.normal(loc=0.0, scale=10, size=1000),
                       'B': np.random.normal(loc=0.0, scale=25, size=1000),
                       'C': np.random.normal(loc=0.0, scale=50, size=1000)})

df_kde.head()

Unnamed: 0,A,B,C
0,-1.286965,-12.849821,136.28011
1,-20.446771,55.75728,-21.984688
2,23.199205,-27.43482,-87.525498
3,-7.962537,2.762579,68.621589
4,-0.050482,-29.369443,-10.104472


In [None]:
# df_kde.hvplot.kde(y=['A', 'B', 'C'])
df_kde.hvplot.kde(y=list('ABC'))

# Scatter Matrix

In [None]:
hvplot.scatter_matrix(iris, c="species")

# Table

In [None]:
barley = vds.barley()
barley.head()

Unnamed: 0,site,variety,year,yield
0,University Farm,Manchuria,1931,27.0
1,Waseca,Manchuria,1931,48.86667
2,Morris,Manchuria,1931,27.43334
3,Crookston,Manchuria,1931,39.93333
4,Grand Rapids,Manchuria,1931,32.96667


In [None]:
# tables can be sorted
barley.hvplot(kind='table')

# Overlay

In [None]:
# use star (*) operator to overlay chart elements
df_overlay = pd.DataFrame({'x': list(np.arange(0, 100, 10)),
                           'y': np.random.rand(10)})
df_overlay.hvplot(x='x', y='y', kind='bar', c='blue') * df_overlay.hvplot(x='x', y='y', kind='line', c='orange')

# Interactive Charts with Widgets

In [None]:
gapminder = vds.gapminder()
gapminder.tail()

Unnamed: 0,cluster,country,fertility,life_expect,pop,year
688,3,Venezuela,3.6485,70.19,16997509,1985
689,3,Venezuela,3.25,71.15,19325222,1990
690,3,Venezuela,2.9415,72.146,21555902,1995
691,3,Venezuela,2.723,72.766,23542649,2000
692,3,Venezuela,2.547,73.747,25375281,2005


In [None]:
gapminder.hvplot(x='year', y='life_expect', groupby='country', kind='line')

In [None]:
stocks = vds.stocks()
stocks.head()

Unnamed: 0,symbol,date,price
0,MSFT,2000-01-01,39.81
1,MSFT,2000-02-01,36.35
2,MSFT,2000-03-01,43.22
3,MSFT,2000-04-01,28.37
4,MSFT,2000-05-01,25.45


In [None]:
# example using ipywidgets

import ipywidgets

symbol_widget = ipywidgets.Dropdown(options=stocks.symbol.unique().tolist(), 
                                    value='MSFT', 
                                    description='symbol:')

def create_chart(symbol):
    chart = stocks[stocks.symbol == symbol].hvplot(x='date', y='price', title=f'{symbol}')
    display(chart)
    
ipywidgets.interact(create_chart, symbol=symbol_widget)

interactive(children=(Dropdown(description='symbol:', options=('MSFT', 'AMZN', 'IBM', 'GOOG', 'AAPL'), value='…

<function __main__.create_chart(symbol)>

In [None]:
airports = vds.airports()
airports.head()

Unnamed: 0,iata,name,city,state,country,latitude,longitude
0,00M,Thigpen,Bay Springs,MS,USA,31.953765,-89.234505
1,00R,Livingston Municipal,Livingston,TX,USA,30.685861,-95.017928
2,00V,Meadow Lake,Colorado Springs,CO,USA,38.945749,-104.569893
3,01G,Perry-Warsaw,Perry,NY,USA,42.741347,-78.052081
4,01J,Hilliard Airpark,Hilliard,FL,USA,30.688012,-81.905944


# Maps

In [None]:
# hvplot with geoviews

# go to python.org, python package index, search for geoviews, follow download instructions
import geoviews as gv

gv.tile_sources.ESRI * airports.hvplot.points('longitude', 
                                              'latitude', 
                                              geo=True, 
                                              color='yellow', 
                                              alpha=0.2, 
                                              height=500,
                                              xlim=(-180, -30), 
                                              ylim=(0, 72))