In [1]:
import pandas as pd
import plotly
#import chart_studio.plotly as py
import math
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.core.display import display, HTML
%matplotlib inline
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
plotly.offline.init_notebook_mode(connected=True)

In [2]:
# gapmider data
gapminder_df=px.data.gapminder()
gapminder_df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


In [3]:
# tips data
tips_df=px.data.tips()
tips_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


## Scatter Plot

In [4]:
# with plotly express
gapminder_subset=gapminder_df[gapminder_df['year']==2007]
fig = px.scatter(gapminder_subset, y="lifeExp", x="gdpPercap",title='Life Expectancy vs GDP Per Capita',hover_name='country')
iplot(fig)

In [5]:
# with plotly grammar
# making indivisual traces with graph objects
trace1=go.Scatter(y=gapminder_subset["lifeExp"],x=gapminder_subset["gdpPercap"])
# input all traces in a list
data=[trace1]
# define graph name and label the x and y axes
layout=go.Layout(title='Life Expectancy vs GDP Per Capita', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='GDP Per Capita'),hovermode='closest')
# define figure
figure=go.Figure(data=data,layout=layout)
# plot figure
iplot(figure)

This graph does not look as expected, given that the graph object is defined as a scatter plot

In [6]:
# changing plot to scatter with mode parameter
# making indivisual traces with graph objects
trace1=go.Scatter(y=gapminder_subset["lifeExp"],x=gapminder_subset["gdpPercap"],mode='markers')
# input all traces in a list
data=[trace1]
# define graph name and label the x and y axes
layout=go.Layout(title='Life Expectancy vs GDP Per Capita', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='GDP Per Capita'),hovermode='closest')
# define figure
figure=go.Figure(data=data,layout=layout)
# plot figure
iplot(figure)

The 'mode' property is a flaglist and may be specified
    as a string containing:
      - Any combination of ['lines', 'markers', 'text'] joined with '+' characters
        (e.g. 'lines+markers')
        OR exactly one of ['none'] (e.g. 'none')

In [7]:
# Splitting the scatter by continent
# plotly express
fig = px.scatter(gapminder_subset, y='lifeExp', x="gdpPercap",color='continent',title='Life Expectancy vs GDP Per Capita',hover_name='country')
iplot(fig)

In [8]:
# Same plot with plotly grammar
# imagine having 100 splits
# wont be possible to write 100 such lines, will take effort, time and can be cause of mistake
trace1=go.Scatter(y=gapminder_subset[gapminder_subset['continent']=='Asia']["lifeExp"],x=gapminder_subset[gapminder_subset['continent']=='Asia']["gdpPercap"],name='Asia',mode='markers') 
trace2=go.Scatter(y=gapminder_subset[gapminder_subset['continent']=='Europe']["lifeExp"],x=gapminder_subset[gapminder_subset['continent']=='Europe']["gdpPercap"],name='Europe',mode='markers') 
trace3=go.Scatter(y=gapminder_subset[gapminder_subset['continent']=='Africa']["lifeExp"],x=gapminder_subset[gapminder_subset['continent']=='Africa']["gdpPercap"],name='Africa',mode='markers')
trace4=go.Scatter(y=gapminder_subset[gapminder_subset['continent']=='Americas']["lifeExp"],x=gapminder_subset[gapminder_subset['continent']=='Americas']["gdpPercap"],name='Americas',mode='markers')
trace5=go.Scatter(y=gapminder_subset[gapminder_subset['continent']=='Oceania']["lifeExp"],x=gapminder_subset[gapminder_subset['continent']=='Oceania']["gdpPercap"],name='Oceania',mode='markers')
data=[trace1,trace2,trace3,trace4,trace5]
layout=go.Layout(title='Life Expectancy vs GDP Per Capita', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='GDP Per Capita'),hovermode='closest')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

In [9]:
# avoiding to write muptiple scatter objects, using list comrehension to write only one line
traces=[go.Scatter(y=gapminder_subset[gapminder_subset['continent']==cont]["lifeExp"],x=gapminder_subset[gapminder_subset['continent']==cont]["gdpPercap"],name=cont,mode='markers') for cont in gapminder_subset['continent'].unique()]
data=traces
layout=go.Layout(title='Life Expectancy vs GDP Per Capita', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='GDP Per Capita'),hovermode='closest')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

### To explore all the parameters accepted by go.Layout explore this [Link](https://plotly.com/python-api-reference/generated/plotly.graph_objects.Layout.html)

In [10]:
# modifying markers shape, size and color
trace1=go.Scatter(y=gapminder_subset["lifeExp"],x=gapminder_subset["gdpPercap"],mode='markers',marker=dict(size=10,color='darkorchid',symbol='square',))
data=[trace1]
layout=go.Layout(title='Life Expectancy vs GDP Per Capita', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='GDP Per Capita'),hovermode='closest')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

## Bubble Plots

In [11]:
# With plotly express
# With x axis on log scale
fig = px.scatter(gapminder_subset, x="gdpPercap", y="lifeExp",color='continent',size='pop',hover_name="country", log_x=True, size_max=60,title='Life Expectancy vs GDP Per Capita')
iplot(fig)

In [12]:
traces=[go.Scatter(x=gapminder_subset[gapminder_subset['continent']==cont]["gdpPercap"],y=gapminder_subset[gapminder_subset['continent']==cont]["lifeExp"],name=cont,mode='markers',marker=dict(size=gapminder_subset[gapminder_subset['continent']==cont]['pop']/10000000)) for cont in gapminder_subset['continent'].unique()]
data=traces
# transforming x axis to log
layout=go.Layout(title='Life Expectancy vs GDP Per Capita', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='GDP Per Capita',type='log'),hovermode='closest')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

In [13]:
# With plotly express
# When the color is a continous variable
fig = px.scatter(gapminder_subset, x="gdpPercap", y="lifeExp",color="gdpPercap",size='pop',hover_name="country", log_x=True, size_max=60)
iplot(fig)

## Line Plots

In [14]:
ocean_df = gapminder_df[gapminder_df["continent"]=='Oceania']

fig = px.line(ocean_df, x="year", y="lifeExp", color='country',title='Life Expectancy by Years')
iplot(fig)

In [15]:
# line plots are scatter plots with mode='lines'
traces=[go.Scatter(y=ocean_df[ocean_df['country']==cont]['lifeExp'],x=ocean_df[ocean_df['country']==cont]['year'],name=cont,mode='lines') for cont in ocean_df['country'].unique()]
data=traces
layout=go.Layout(title='Life Expectancy by Years', yaxis=dict(title='Life Expectancy'),xaxis=dict(title='Year'),hovermode='closest')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

In [16]:
yearcont_pivot=pd.pivot_table(gapminder_df,values='lifeExp',index='year',columns='continent',aggfunc='mean')
yearcont_pivot

continent,Africa,Americas,Asia,Europe,Oceania
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1952,39.1355,53.27984,46.314394,64.4085,69.255
1957,41.266346,55.96028,49.318544,66.703067,70.295
1962,43.319442,58.39876,51.563223,68.539233,71.085
1967,45.334538,60.41092,54.66364,69.7376,71.31
1972,47.450942,62.39492,57.319269,70.775033,71.91
1977,49.580423,64.39156,59.610556,71.937767,72.855
1982,51.592865,66.22884,62.617939,72.8064,74.29
1987,53.344788,68.09072,64.851182,73.642167,75.32
1992,53.629577,69.56836,66.537212,74.4401,76.945
1997,53.598269,71.15048,68.020515,75.505167,78.19


In [17]:
# mode=lines + Markers
traces=[go.Scatter(x=yearcont_pivot.index,y=yearcont_pivot.loc[:,country],name=country,mode='lines+markers') for country in yearcont_pivot.columns]
data=traces
layout=go.Layout(title='Life Expectancy by Years', xaxis=dict(title='Year'),yaxis=dict(title='Life Expectancy'),hovermode='closest')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

## Bar Plots

In [18]:
# With Plotly Express
# Stacked
fig = px.bar(gapminder_df[gapminder_df['continent']=='Americas'], x="year", y="pop", color="country", title="Population split by Years",barmode='stack')
iplot(fig)

In [19]:
gapminder_americas=gapminder_df[gapminder_df['continent']=='Americas']
traces=[go.Bar(x=gapminder_americas[gapminder_americas['country']==country]['year'],y=gapminder_americas[gapminder_americas['country']==country]['pop'],name=country) for country in gapminder_americas['country'].unique().tolist()]
data=traces
layout=go.Layout(title="Population split by Years", xaxis=dict(title='Population'),yaxis=dict(title='Year'),hovermode='closest',barmode='stack')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

In [20]:
# With Plotly Express
# Grouped 
fig = px.bar(gapminder_df, x="year", y="pop", color="continent", title="Population split by Years and Continent",barmode='group',hover_name='country')
iplot(fig)

In [21]:
traces=[go.Bar(x=gapminder_df[gapminder_df['continent']==country]['year'],y=gapminder_df[gapminder_df['continent']==country]['pop'],name=country) for country in gapminder_df['continent'].unique()]
data=traces
layout=go.Layout(title='Life Expectancy by Years', xaxis=dict(title='Population'),yaxis=dict(title='Year'),hovermode='closest',barmode='group')
figure=go.Figure(data=data,layout=layout)
iplot(figure)

In [22]:
# when color variable is continuous 
# With Plotly Express

fig = px.bar(gapminder_df[gapminder_df['continent']=='Americas'], x='year', y='pop',
             hover_data=['lifeExp', 'gdpPercap'], color='lifeExp',hover_name='country',
             labels={'pop':'population of World'}, height=400)
iplot(fig)

In [23]:
# Horizontal Charts
# Orientation='h'
fig = px.bar(gapminder_df[gapminder_df['continent']=='Oceania'], x="pop", y="year", color="country", title="Population split by Years",barmode='group',orientation='h')
iplot(fig)

## Box Plots

In [24]:
# For displaying points 
# points='all'
fig = px.box(tips_df, x="day", y="total_bill", color="smoker", points="all")

iplot(fig)

## Histograms

In [25]:

fig = px.histogram(tips_df, x="total_bill")
iplot(fig)

In [26]:
# When splitting varibale is categorical
fig = px.histogram(tips_df, x="day")
iplot(fig)

In [27]:
# Setting number of bins
fig = px.histogram(tips_df, x="total_bill", nbins=20)
iplot(fig)

In [28]:
# Setting number of bins and bin size

counts, bins = np.histogram(tips_df.total_bill, bins=range(0, 60, 5))
bins = 0.5 * (bins[:-1] + bins[1:])

fig = px.bar(x=bins, y=counts, labels={'x':'total_bill', 'y':'count'})
iplot(fig)

In [29]:
# prob density plot
fig = px.histogram(tips_df, x="total_bill", histnorm='probability density',log_y=True)
iplot(fig)

In [30]:
#With slider and split by categorical varibale
fig = px.histogram(tips_df, x="total_bill", color="sex",marginal="rug",barmode='stack')
iplot(fig)

In [31]:
fig = px.histogram(tips_df, x="total_bill", color="sex",marginal="box",barmode='overlay')
iplot(fig)

In [32]:
#prob dist plots
import plotly.figure_factory as ff


custom_df = pd.DataFrame({'2012': np.random.randn(200),
                   '2013': np.random.randn(200)+1})
fig = ff.create_distplot([custom_df[c] for c in custom_df.columns], custom_df.columns, bin_size=.25)
#ff.create_distplot(values, column_names, bin_sizes)
iplot(fig)

## Regression Plots

In [33]:
fig = px.scatter(tips_df, x="total_bill", y="tip", trendline="ols")
iplot(fig)

In [34]:
# Split regression
fig = px.scatter(tips_df, x="total_bill", y="tip", facet_col="smoker",facet_row="time", color="sex", trendline="ols")
iplot(fig)

results = px.get_trendline_results(fig)
print(results)

results.query("sex == 'Male' and smoker == 'Yes'").px_fit_results.iloc[0].summary()

      sex    time smoker                                     px_fit_results
0  Female  Dinner     No  <statsmodels.regression.linear_model.Regressio...
1  Female  Dinner    Yes  <statsmodels.regression.linear_model.Regressio...
2  Female   Lunch     No  <statsmodels.regression.linear_model.Regressio...
3  Female   Lunch    Yes  <statsmodels.regression.linear_model.Regressio...
4    Male  Dinner     No  <statsmodels.regression.linear_model.Regressio...
5    Male  Dinner    Yes  <statsmodels.regression.linear_model.Regressio...
6    Male   Lunch     No  <statsmodels.regression.linear_model.Regressio...
7    Male   Lunch    Yes  <statsmodels.regression.linear_model.Regressio...


0,1,2,3
Dep. Variable:,y,R-squared:,0.203
Model:,OLS,Adj. R-squared:,0.185
Method:,Least Squares,F-statistic:,11.44
Date:,"Mon, 03 Aug 2020",Prob (F-statistic):,0.0015
Time:,19:07:01,Log-Likelihood:,-82.849
No. Observations:,47,AIC:,169.7
Df Residuals:,45,BIC:,173.4
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.4656,0.533,2.748,0.009,0.391,2.540
x1,0.0701,0.021,3.382,0.001,0.028,0.112

0,1,2,3
Omnibus:,19.093,Durbin-Watson:,1.298
Prob(Omnibus):,0.0,Jarque-Bera (JB):,25.489
Skew:,1.381,Prob(JB):,2.92e-06
Kurtosis:,5.322,Cond. No.,65.3


In [35]:
# non linear fitting
fig = px.scatter(gapminder_subset, x="gdpPercap", y="lifeExp", color="continent", trendline="lowess")
iplot(fig)

## Faceted Plots

In [36]:
fig = px.bar(tips_df, x="sex", y="total_bill", color="smoker", barmode="group",
             facet_row="time", facet_col="day",
             category_orders={"day": ["Thur", "Fri", "Sat", "Sun"],
                              "time": ["Lunch", "Dinner"]})
iplot(fig)

## Dealing with long and wide input format

In [37]:
#making wide format by pivoting
tips_pivot=pd.pivot_table(tips_df,values='size',index='sex',columns='day',aggfunc='count')
tips_pivot

day,Fri,Sat,Sun,Thur
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,9,28,18,32
Male,10,59,58,30


In [38]:
tips_pivot.loc['Female','Fri']

9

In [39]:
# converting index to column
tips_pivot.reset_index()

day,sex,Fri,Sat,Sun,Thur
0,Female,9,28,18,32
1,Male,10,59,58,30


In [40]:
# Wide format code
fig = px.bar(tips_pivot.reset_index(), x="sex", y=['Fri','Sat','Sun','Thur'], title="Wide-Form Input")
iplot(fig)

In [41]:
# grouping to get long format
tips_group=tips_df.groupby(['sex','day']).count()
tips_group

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,smoker,time,size
sex,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,Fri,9,9,9,9,9
Female,Sat,28,28,28,28,28
Female,Sun,18,18,18,18,18
Female,Thur,32,32,32,32,32
Male,Fri,10,10,10,10,10
Male,Sat,59,59,59,59,59
Male,Sun,58,58,58,58,58
Male,Thur,30,30,30,30,30


In [42]:
#resetting index for flat table
tips_group.reset_index()

Unnamed: 0,sex,day,total_bill,tip,smoker,time,size
0,Female,Fri,9,9,9,9,9
1,Female,Sat,28,28,28,28,28
2,Female,Sun,18,18,18,18,18
3,Female,Thur,32,32,32,32,32
4,Male,Fri,10,10,10,10,10
5,Male,Sat,59,59,59,59,59
6,Male,Sun,58,58,58,58,58
7,Male,Thur,30,30,30,30,30


In [43]:
#long format input
fig = px.bar(tips_group.reset_index(), x="sex", y="size", color="day", title="Long-Form Input")
iplot(fig)

## Gant Chart

In [44]:
effort_df = pd.DataFrame([
    dict(Task="Job A", Start='2009-01-01', Finish='2009-02-28', Resource="Ranvir"),
    dict(Task="Job B", Start='2009-03-05', Finish='2009-04-15', Resource="Riya"),
    dict(Task="Job C", Start='2009-02-20', Finish='2009-05-30', Resource="Riya"),
    dict(Task="Job C", Start='2009-01-20', Finish='2009-04-30', Resource="Dhruv")

])
effort_df

Unnamed: 0,Task,Start,Finish,Resource
0,Job A,2009-01-01,2009-02-28,Ranvir
1,Job B,2009-03-05,2009-04-15,Riya
2,Job C,2009-02-20,2009-05-30,Riya
3,Job C,2009-01-20,2009-04-30,Dhruv


In [45]:
fig = px.timeline(effort_df, x_start="Start", x_end="Finish", y="Task", color="Resource")
fig.update_yaxes(autorange="reversed")
iplot(fig)

In [46]:
fig = px.timeline(effort_df, x_start="Start", x_end="Finish", y="Resource", color="Resource")
iplot(fig)

## Sunburst Chart

In [47]:
fig = px.sunburst(tips_df, path=['day', 'time', 'sex','smoker'], values='tip')
iplot(fig)

## Sankey Diagram

In [48]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ["A1", "A2", "B1", "B2", "C1", "C2",'D1'],
      color = "blue"
    ),
    link = dict(# indices correspond to labels, eg A1, A2, A2, B1, ...
      source = [0, 1, 0, 2, 3, 3,5], # all the origin indicies
      target = [2, 3, 3, 4, 4, 5,6], # all the destination indicies
      value = [8, 4, 2, 8, 4, 2,9] # distance indicies
  ))])

#fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
iplot(fig)

## Tree Maps

In [49]:
fig = px.treemap(
    names = ["Baba","Rajiv", "Leena", "Nupur", "Kanika", "Ranvir", "Yamini", "Yashi", "Shivangi",'Shrijit','K Bhanja','Y Bhanja'],
    parents = ["", 'Baba','Baba','Baba','Nupur','Rajiv','Nupur','Leena','Rajiv','Leena','Kanika','Yamini']
)
iplot(fig)

In [50]:
tips_df['all']='all'
fig = px.treemap(tips_df, path=['all','day', 'time', 'sex','smoker'], values='total_bill')
iplot(fig)

In [51]:
gapminder_df["world"] = "world" # in order to have a single root node
fig = px.treemap(gapminder_df, path=['world', 'continent', 'country'], values='pop',
                  color='lifeExp', hover_data=['iso_alpha'],
                  color_continuous_scale='RdBu',
                  #color_continuous_midpoint=np.average(gapminder_df['lifeExp'], weights=gapminder_df['pop'])
                )
iplot(fig)

## Widgets

In [52]:
stock_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')

fig = px.line(stock_df, x='Date', y='AAPL.High', title='Time Series with Range Slider and Selectors')

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
iplot(fig)