# Plotly
![image](https://yt3.googleusercontent.com/JalewCrNLcNFL7SJgxWy5Xhx4TW2MoCDycuY4sR5yaZ9qoMc38il_97M9ht6b7nzcpdrJ18P3A=w1060-fcrop64=1,00005a57ffffa5a8-k-c0xffffffff-no-nd-rj)


Plotly is a Python library used to create over 40 beautiful interactive web-based visualizations that can be displayed in Jupyter Notebooks or saved to HTML files. It is widely used to plot scientific, statistical, and financial data. It allows you to create a wide range of interactive plots, charts, and graphs, such as line charts, bar charts, scatter plots, heatmaps, 3D plots, and more.

### Import required libraries

In [None]:
import numpy as np
import pandas as pd 
import seaborn as sns
import cufflinks as cf                            # Cufflinks allowed you to call Plotly functions directly on Pandas DataFrames, which then automatically converted the DataFrame into the appropriate Plotly visualization.
import plotly.express as px
import plotly.graph_objects as go                 # Allows us to create graph objects for making more customized plots
import chart_studio.plotly as py                  # Plotly Chart Studio is a web-based platform that allows you to create, host, and share interactive visualizations and dashboards.
from urllib.request import urlopen                # Allows us to grab data from a supplied URL
import json                                       # Used to decode JSON data
%matplotlib inline                                # It configures the notebook environment to display plots directly below the code cell that generates them.

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

In [None]:
# Set your Plotly username and API key
username = 'krvipin15'
api_key = 'x9ZdmJckQEQZy8R08fYG'

# Authenticate using your API key
py.plotly.tools.set_credentials_file(username=username, api_key=api_key)

### Creating a basic dataframe and plots

In [None]:
# Create a dataframe using Numpy array that is 50 by 4
arr1 = np.random.rand(50, 4)
df1 = pd.DataFrame(data=arr1, columns=["A", "B", "C", "D"])
df1.head()

In [None]:
# Compare old plot with plotly interactive plot
df1.plot()

In [None]:
df1.iplot()

### Line Plots
It is used to display data points on a two-dimensional plane. It is particularly useful for showing trends or patterns in data over a continuous interval, such as time.

In [None]:
# Using in-built stock dataset
stocks_df = px.data.stocks()
stocks_df.head(3)

In [None]:
# Create plot for Google stock 
px.line(data_frame=stocks_df, 
        x="date", 
        y="GOOG", 
        labels={"date":'Date', "GOOG": 'Price'}, 
        title='Google Stock Price over Time')

In [None]:
# Create plot for multiple stocks
px.line(data_frame=stocks_df, 
        x="date", 
        y=["GOOG", "AAPL"], 
        labels={'date':'Date', 'value':'Price'}, 
        title='Google vs Apple')

In [None]:
# Create a figure and add plots to it
fig1 = go.Figure()

# Add trace to the figure and create custom lines such as dash, dot, and dashdot
fig1.add_trace(trace=go.Scatter(x=stocks_df.date, 
                                y=stocks_df.AAPL, 
                                mode='lines', 
                                name='Apple', 
                                line=dict(color='blue', width=2, dash='dot')))

fig1.add_trace(trace=go.Scatter(x=stocks_df.date, 
                                y=stocks_df.AMZN, 
                                mode='lines+markers', 
                                name='Amazon', 
                                line=dict(color='green', width=2, dash='dot')))

fig1.add_trace(trace=go.Scatter(x=stocks_df.date, 
                                y=stocks_df.GOOG,
                                mode='lines+markers', 
                                name='Google', 
                                line=dict(color='firebrick', width=2, dash='dashdot')))

fig1.update_layout(title='Stock Price Data 2018-2020', xaxis_title='Date', yaxis_title='Price')

In [None]:
# Updating layout of the figure
fig = go.Figure()

# Add trace to the figure
fig.add_trace(trace=go.Scatter(x=stocks_df.date, 
                               y=stocks_df.AAPL, 
                               mode='lines', 
                               name='Apple'))

fig.add_trace(trace=go.Scatter(x=stocks_df.date, 
                               y=stocks_df.AMZN,
                               mode='lines',
                               name='Amazon'))

fig.add_trace(trace=go.Scatter(x=stocks_df.date,
                               y=stocks_df.GOOG, 
                               mode='lines', 
                               name='Google'))

fig.update_layout(
    # Shows gray line without grid, styling fonts, linewidths and more on x-axis
    xaxis=dict(showline=True, showgrid=False, linecolor='rgb(204,204,204)', linewidth=2, ticks='outside', tickfont=dict(family='Arial', size=12, color='rgb(82,82,82)')),
    yaxis=dict(showline=False, showgrid=False, zeroline=False, showticklabels=False),   # Turn off everything on y-axis
    autosize=False,
    margin=dict(autoexpand=False, l=100, r=20, t=110),
    showlegend=False,
    plot_bgcolor='white')

### Bar Charts
A bar chart is a graphical representation of data where rectangular bars are used to compare different categories or groups. Each bar represents a category, and the length or height of the bar is proportional to the value it represents.

In [None]:
# Using in-built tipds dataset 
tips_df = px.data.tips()
tips_df.head(3)

In [None]:
# Create a stacked bar
px.bar(data_frame=tips_df, 
       x='day', 
       y='tip', 
       color='sex', 
       title='Tips by Sex on each day', 
       labels={'tip':'Tip Amount', 'day':'Day of the Week'})

In [None]:
# Place bars next to each other
px.bar(data_frame=tips_df, 
       x='sex', 
       y='total_bill',
       color='smoker', 
       barmode='group')

In [None]:
# Using in-built gapminder dataset and filter the data
gapminder_df = px.data.gapminder()
europe_df = gapminder_df[(gapminder_df['continent']=='Europe') & (gapminder_df['year']==2007) & (gapminder_df['pop'] > 2.e6)]
europe_df.head(3)

In [None]:
# Display population data for Europe in 2007 and greater than 20,00,000
fig = px.bar(europe_df, 
             x='country', 
             y='pop', 
             text='pop', 
             color='country')

# Put bar total value above bars with 2 values of precision
fig.update_traces(texttemplate='%{text:.2s}', 
                  textposition='outside')

# Set fontsize and rotate labels to 45 degree, uniformtext_mode='hide'  used to hide text if it won't fit
fig.update_layout(uniformtext_minsize=8, 
                  xaxis_tickangle=-45)

### Scatter Plots
A scatter plot is a graphical representation of data points in a two-dimensional space. Each data point is represented as a dot or marker on the plot, with one value plotted along the x-axis and another value plotted along the y-axis. Scatter plots are commonly used to display the relationship between two variables and to identify patterns or trends in the data.

In [None]:
# Using inbuilt Iris dataset 
iris_df = px.data.iris()
iris_df.head(3)

In [None]:
# Create a scatter plot by defining x, y, different color for count of provided
# column, size based on supplied column and additional data to display on hover
px.scatter(data_frame=iris_df, 
           x='sepal_width', 
           y='sepal_length', 
           color='species', 
           size='petal_length', 
           hover_data=['petal_width'])

In [None]:
# Create a customized scatter with black marker edges with line width 2, opaque
# and colored based on width. Also show a scale on the right
fig=go.Figure()

fig.add_trace(trace=go.Scatter(x=iris_df.sepal_width, 
                               y=iris_df.sepal_length, 
                               mode='markers', 
                               marker_color=iris_df.sepal_width, 
                               text=iris_df.species, 
                               marker=dict(showscale=True)))

fig.update_traces(marker_line_width=2, 
                  marker_size=10)

In [None]:
fig = go.Figure(data=go.Scattergl(x=np.random.rand(100000), 
                                  y=np.random.rand(100000), 
                                  mode='markers', 
                                  marker=dict(color=np.random.rand(100000), colorscale='Viridis', line_width=1)))
fig.show()

### Pie Charts

A pie chart is a circular statistical graphic that is divided into sectors, to represent the distribution of a categorical dataset. Each sector's size is proportional to the quantity or percentage, it represents within the whole dataset.

In [None]:
# Using in-built gapminder dataset
samer_df = px.data.gapminder().query("year==2007").query("continent=='Asia'")
samer_df.head(3)

In [None]:
px.pie(data_frame=samer_df, 
       names='country', 
       values='pop', 
       title='Population of Asian Continent', 
       color_discrete_sequence=px.colors.sequential.RdBu)

In [None]:
# Customize pie chart
colors = ['#7AB8D5', '#3E92CC', '#1E77B5', '#155FA0', '#0B4689', '#08306B']

fig = go.Figure(data=[go.Pie(labels=['Water', 'Grass', 'Normal', 'Psychic', 'Fire', 'Ground'], 
                             values=[110, 90, 80, 80, 70, 60])])

fig.update_traces(hoverinfo='label+percent', 
                  textfont_size=20, 
                  textinfo='label+percent', 
                  pull=[0.1, 0, 0.2, 0, 0, 0], 
                  marker=dict(colors=colors, line=dict(color='#FFFFFF', width=2)))

### Histograms
A histogram is a graphical representation of the distribution of numerical data. It divides the data into intervals or 'bins', and displays how many data points fall into each bin. It provide a visual depiction of data's underlying frequency distribution, allowing viewers to quickly understand the shape, center, spread, and presence of any patterns or outliers in dataset

In [None]:
# Plot a histogram based on the rolling of two dice
dice1 = np.random.randint(low=1, high=7, size=5000)
dice2 = np.random.randint(low=1, high=7, size=5000)
dice_sum = dice1 + dice2
dice_sum

In [None]:
fig = px.histogram(dice_sum, 
                   nbins=11, 
                   title='5000 Dice Roll Histogram', 
                   marginal='violin', 
                   color_discrete_sequence=['#0B4689'])

fig.update_layout(xaxis_title='Dice Roll', 
                  yaxis_title='Dice Sum', 
                  bargap=0.1, 
                  showlegend=False)

In [None]:
# Stack histograms based on different column data from tips dataset
fig = px.histogram(data_frame=tips_df, 
                   x='total_bill', 
                   color='sex', 
                   color_discrete_sequence=['#3E92CC', '#155FA0'])

fig.update_layout(bargap=0.01)

### Box Plots / Box & Wishker Plots
It allows you to compare different variables, as it provides the summary of key statistics, and visualize the spread, central tendency, and presence of outliers. Box plot is constructed of -  
1. A rectangular box which represent range between first quartile(Q1) and third quartile(Q3).
2. A vertical line inside box represents the median.
3. 'Whiskers' extend from the box to the minimum and maximum values within a certain range.
4. Outliers represented as individual points or dots beyonf the whiskers.

In [None]:
# Creating box plot with all data points
px.box(tips_df, 
       x='day', 
       y='tip', 
       points='all', 
       color_discrete_sequence=['#08306B'])

In [None]:
# Display tip sex data by day
px.box(tips_df, 
       x='day', 
       y='tip', 
       color='sex', 
       color_discrete_sequence=['#155FA0', '#08306B'])

In [None]:
# Adding standard deviaton and mean
fig = go.Figure()

fig.add_trace(go.Box(x=tips_df.sex,
                     y=tips_df.tip, 
                     marker_color='#08306B', 
                     boxmean='sd'))

In [None]:
# Using Stocks dataset
fig = go.Figure()

# Show all points, spread them so they don't overlap, and change whisker width
fig.add_trace(go.Box(y=stocks_df.GOOG, 
                     boxpoints='all', 
                     name='Google', 
                     fillcolor='blue', 
                     jitter=0.5, 
                     whiskerwidth=0.2))

fig.add_trace(go.Box(y=stocks_df.AAPL, 
                     boxpoints='all', 
                     name='Apple', 
                     fillcolor='red', 
                     jitter=0.5, 
                     whiskerwidth=0.2))

fig.update_layout(title='Google vs Apple', 
                  yaxis=dict(gridcolor='rgb(255, 255, 255)', gridwidth=3), 
                  paper_bgcolor='rgb(243,243,243)', 
                  plot_bgcolor='rgb(243,243,243)')

### Violin Plot
It combines elements of both *Box Plots* and *Kernel Density Plots*. It is used to display the distribution of a dataset accross different categories. The plot resembles a violin in shape, with central body representing the density of the data values, and 'violins' extending from the body that shows the spread of the data. 

In [None]:
# Create violin plot using tips dataset
px.violin(data_frame=tips_df,
          y='total_bill', 
          box=True, 
          points='all',
          hover_data=tips_df.columns)

In [None]:
# Multiple plots
px.violin(data_frame=tips_df, 
          x='smoker', 
          y='tip', 
          color='sex', 
          box=True, 
          points='all', 
          hover_data=tips_df.columns)

In [None]:
# Morph left and right sides based on if the customer smokes or not
tip_df = tips_df.query("smoker=='Yes'")
fig= go.Figure()

fig.add_trace(trace=go.Violin(x=tips_df['day'], 
                              y=tips_df['total_bill'],
                              legendgroup='Yes', 
                              scalegroup='Yes', 
                              name='Yes',
                              side='negative',
                              line_color='blue'))

tip_df = tips_df.query("smoker=='No'")
fig.add_trace(trace=go.Violin(x=tips_df['day'], 
                              y=tips_df['total_bill'],
                              legendgroup='No', 
                              scalegroup='No', 
                              name='No',
                              side='positive',
                              line_color='red'))

### Density Heatmap
It is a graphical representation that uses color intensity to visualize the density of data points accross 2D space. Useful for displaying the concentration of data and identifying patterns or trends in data distribution.

In [None]:
# Create a heatmap using Seaborn in-built dataset
flights = sns.load_dataset("flights")
flights.head(3)

In [None]:
# You can set bins with nbinsx and nbinsy
fig = px.density_heatmap(flights, 
                         x='year', 
                         y='month', 
                         z='passengers', 
                         color_continuous_scale='Viridis')
fig.show()

In [None]:
# You can add histograms
fig = px.density_heatmap(flights, 
                         x='year',
                         y='month', 
                         z='passengers',
                         marginal_x='histogram', marginal_y='histogram')

fig.update_layout(bargap=0.01)

### 3D Scatter Plots
It is a 3D data visualization used to display relationships and patterns among three numerical variables. 

In [None]:
# Create a 3D Scatter Plot using flight dataset
fig = px.scatter_3d(data_frame=flights, 
                    x='year', 
                    y='month', 
                    z='passengers',
                    color='year', 
                    template='plotly_dark', 
                    opacity=0.8, 
                    width=700, 
                    height=700)
fig.show()

### 3D Line Plots
It is a 3D data visualization that represents continous line connecting data points, and used to illustrate the relationship between 3 numerical variables. It is useful for visualizing trends, patterns, and changes in data

In [None]:
fig = px.line_3d(data_frame=flights, 
                 x='year', 
                 y='month', 
                 z='passengers', 
                 color='year', 
                 template='plotly_dark', 
                 width=700, 
                 height=700)

fig.update_traces(line=dict(width=4))

### Scatter Matrix / Pair Plot
It is a graphical tool used to display pairwise scatter plots of multiple variables in a dataset. Useful for exploring relationships and correlations between multiple numerical variables.

In [None]:
px.scatter_matrix(data_frame=flights, 
                  color='month')

### Map Scatter Plots
It displays data points on a geographical map, and used to show spatial distribution and relationships of data points that have associated geographical coordinates.

In [None]:
df = px.data.gapminder().query("year == 2007")
px.scatter_geo(data_frame=df, 
               locations='iso_alpha', 
               color='continent', 
               hover_name='country', 
               size='pop', 
               projection='orthographic', 
               template='plotly_dark', 
               width=800, 
               height=400)

### Choropleth Maps
It is a thematic map that uses different shades, colors, or patterns to represent the variations in a particular data variable accross different geographic regions. 

In [None]:
# Grab US country geometry data
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    countries = json.load(response)

In [None]:
# Grab unemployment data based on each countries Federal Information Processing(fip) number
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv", dtype={"fips": str})
df.head(3)

In [None]:
# Draw map using the country JSON data, color using unemployment values on a range of 12
px.choropleth(df, 
              geojson=countries, 
              locations='fips', 
              color='unemp', 
              color_continuous_scale='Viridis', 
              range_color=(0,12), 
              scope='usa', 
              labels={'unemp':'unemployment rate'},
              width=800, 
              height=700)

### Polar Chart / Radar Chart
It displays data points using circular plot with multiple axes radiating from a common center. It's used to visualize multivariate data patterns and comparisons accross different categories.

In [None]:
# Using inbuilt wind dataset
wind_df = px.data.wind()
wind_df.head(3)

In [None]:
# Plot wind data based on direction and frequency
px.scatter_polar(wind_df, 
                 r='frequency', 
                 theta='direction', 
                 color='strength', 
                 size='frequency', 
                 template='plotly_dark', 
                 width=800, 
                 height=400)

In [None]:
# Plot data using lines radially
fig = px.line_polar(wind_df, 
                    r='frequency', 
                    theta='direction', 
                    color='strength', 
                    line_close=True, 
                    template='plotly_dark', 
                    width=800, 
                    height=400)

fig.update_traces(line=dict(width=4))

### Ternary Plot
It is a triangular graph used to visualize the composition of 3 component mixture. It represents the percentage of 3 variables that add up to 100%. Useful in fields like geology, chemistry, and environmental science.

In [None]:
exp_df = px.data.experiment()
exp_df.head(3)

In [None]:
# Used to represent ratios of 3 variables
px.scatter_ternary(exp_df, 
                   a='experiment_1', 
                   b='experiment_2', 
                   c='experiment_3', 
                   hover_name='group', 
                   color='gender')

### Facets
It refers to the splitting of dataset into multiple subets and creating seprate plots for each subset. These plots are displayed together on a single visualization canvas, allowing for easy comparison between different segments of the data.

In [None]:
# You can create numerous subplots
px.scatter(tips_df, 
           x='total_bill', 
           y='tip', 
           color='smoker', 
           facet_col='sex')

In [None]:
# We can line up data in rows and columns
px.histogram(tip_df, 
             x='total_bill', 
             y='tip', 
             color='sex', 
             facet_row='time', 
             facet_col='day', 
             category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun'], 'time':['Lunch', 'Dinner']})

In [None]:
# This dataframe provides scores for different students based on the level of attention they could provide during testing
att_df = sns.load_dataset('attention')
att_df.head(3)

In [None]:
px.line(att_df, 
        x='solutions', 
        y='score', 
        facet_col='subject', 
        facet_col_wrap=5, 
        title='Scores Based on Attention')

### Animated Plots
They are the visualiztions that changes over time or in response to some variable, creating a dynamic and interactive reprentation of data.

In [None]:
# Create an animated plot that you can use to cycle through continent GDP & life expectancy changes
cnt_df = px.data.gapminder()

px.scatter(cnt_df, 
           x='gdpPercap', 
           y='lifeExp', 
           animation_frame='year', 
           animation_group='country', 
           size='pop',
           color='continent', 
           hover_name='country', 
           log_x=True, 
           size_max=55, 
           range_x=[100, 100000], 
           range_y=[25, 90])

In [None]:
# Watch as bar chart population changes
px.bar(cnt_df, 
       x='continent', 
       y='pop', 
       color='continent', 
       animation_frame='year', 
       animation_group='country', 
       range_y=[0, 4000000000])