# Plotly

    Scatter plots allow the comparison of two variables for a set of data.
    

In [None]:
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

In [None]:
np.random.seed(42)
random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)

data = [go.Scatter(x = random_x, y = random_y, 
                   mode = 'markers')]

pyo.plot(data)

In [None]:
np.random.seed(42)
random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)

trace = go.Scatter(x = random_x, 
                   y = random_y, 
                   mode = 'markers', 
                   marker = dict(
                                    color='green',
                                    symbol = 'pentagon',
                                    size=20,
                                    line=dict(
                                        color='black',
                                        width=2
                                    )
                    ))

data = [trace]

layout = go.Layout(
                    title = 'Hello First Plot',
                    xaxis = dict(title = 'My X Axis'),
                    yaxis = dict(title = 'My Y Axis'),
                    hovermode = 'closest'
                  )

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig)

# Line plots in plotly

In [None]:
np.random.seed(56)
x_values = np.linspace(0, 1, 100)
y_values = np.random.randn(100)

trace0 = go.Scatter(x = x_values, y = y_values+5, mode = 'markers')

data = [trace0]

layout = go.Layout(title = 'This is to show different modes', xaxis = {'title':'My X Axis'}, yaxis = dict(title = 'My Y Axis'))

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

In [None]:
np.random.seed(56)
x_values = np.linspace(0, 1, 100)
y_values = np.random.randn(100)

trace0 = go.Scatter(x = x_values, y = y_values+5, mode = 'markers', name = 'markers')
trace1 = go.Scatter(x = x_values, y = y_values, mode = 'markers+lines', name = 'markers with lines')

data = [trace0, trace1]

layout = go.Layout(title = 'This is to show different modes', xaxis = {'title':'My X Axis'}, yaxis = dict(title = 'My Y Axis'))

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

In [None]:
np.random.seed(56)
x_values = np.linspace(0, 1, 100)
y_values = np.random.randn(100)

trace0 = go.Scatter(x = x_values, y = y_values+5, mode = 'markers',  name = 'markers')
trace1 = go.Scatter(x = x_values, y = y_values, mode = 'markers+lines', name = 'markers with lines')
trace2 = go.Scatter(x = x_values, y = y_values-5, mode = 'lines', name = 'lines')

data = [trace0, trace1, trace2]

layout = go.Layout(title = 'This is to show different modes', xaxis = {'title':'My X Axis'}, yaxis = dict(title = 'My Y Axis'))

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# plotting iris dataset using plotly

In [None]:
# perform the imports
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go

In [None]:
iris = pd.read_csv('datasets/iris.csv')
iris

In [None]:
species = iris['class'].value_counts().index
species

In [None]:
iris[iris['class'] == 'Iris-setosa'].sepal_length.values

In [None]:
# returns a list of all the sepal lengths for a given specie
def x_values_generator(specie: str):
    return iris[iris['class'] == specie].sepal_length.values

# returns a list of all the petal lengths for a given specie
def y_values_generator(specie: str):
    return iris[iris['class'] == specie].petal_length.values

In [None]:
trace0 = go.Scatter(x = x_values_generator(species[0]), y = y_values_generator(species[0]),
                    mode = 'markers', marker = dict(color = 'red'), name = species[0])

trace1 = go.Scatter(x = x_values_generator(species[1]), y = y_values_generator(species[1]),
                    mode = 'markers', marker = dict(color = 'blue'), name = species[1])

trace2 = go.Scatter(x = x_values_generator(species[2]), y = y_values_generator(species[2]),
                    mode = 'markers', marker = dict(color = 'green'), name = species[2])


data = [trace0, trace1, trace2]

In [None]:
# create the layout
layout = go.Layout(title = 'Iris Species Sepal Vs Petal Lengths', 
                  xaxis = {'title': 'Sepal Length'},
                  yaxis = {'title': 'Petal Length'})

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# Line chart assignment

    Using the file 2010YumaAZ.csv, develop a line chart that plots seven days worth of temperature data on one graph.
    You can use a for loop to assign each day to its own trace.

In [None]:
df = pd.read_csv('datasets/2010YumaAZ.csv')
df

In [None]:
days = df.DAY.value_counts().index
days

In [None]:
def temperatures(day : str):
    return df[df['DAY'] == day]['T_HR_AVG'].values

def times(day: str):
    return df[df['DAY'] == day]['LST_TIME'].values

In [None]:
data = [go.Scatter(x = times(day), y = temperatures(day), mode = 'lines', name = day) for day in days]
data

In [None]:
# create the layout
layout = go.Layout(title = 'Temperature Vs Time Graph For Various Days', 
                   xaxis = dict(title = 'time'), 
                   yaxis = dict(title = 'temperature'))

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# Line chart assignment 2

    Take the data 'population.csv' and plot the lines + markers graph for the population estimates of the six new england states.

In [None]:
population = pd.read_csv('datasets/population.csv')
population

In [None]:
states = population['Name'].values
states
enumerated_states = list(enumerate(states))
enumerated_states

In [None]:
years = population.columns[1:]
years

In [None]:
def population_by_enumerated_state(enumerated_state: tuple[int, str]):
    index, state = enumerated_state
    
    return np.array([population[year][index] for year in years])

population_by_enumerated_state(enumerated_states[2])

In [None]:
data = [go.Scatter(x = years, y = population_by_enumerated_state(enumerated_state), mode = 'markers+lines', 
                   name = enumerated_state[1]) \
        for enumerated_state in enumerated_states
       ]

data

In [None]:
# create the layout
layout = go.Layout(title = 'population estimates of the six new england states.',
                   xaxis = dict(title = 'years'),
                   yaxis = dict(title = 'population'))

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# Bar chart

    Using Bar charts, we can visualize categorical data.
    Typically, the x-axis is the categories and the y-axis is the number of elements in each category.
    
    We have different types of bar chart:
    1. Simple bar chart
    2. Stacked bar chart
    3. Nested bar chart

In [None]:
# perform the imports
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go

In [None]:
df = pd.read_csv('datasets/2018WinterOlympics.csv')
df

In [None]:
data = [go.Bar(x = df['NOC'], y = df['Total'])]

# create the layout
layout = go.Layout(title = 'Medals')

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

### Nested bar chart example

    By default if we plot multiple bar charts, plotly creates nested bar charts.

In [None]:
trace0 = go.Bar(x = df['NOC'], y = df['Gold'], name = 'Gold', marker = dict(color = '#FFD700'))
trace1 = go.Bar(x = df['NOC'], y = df['Silver'], name = 'Silver', marker = dict(color = '#C0C0C0'))
trace2 = go.Bar(x = df['NOC'], y = df['Bronze'], name = 'Bronze', marker = dict(color = '#CD7F32'))

# 
data = [trace0, trace1, trace2]

# create the layout
layout = go.Layout(title = 'Medals')

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

### Stacked bar chart

    To change the default mode from nested bar chart to stacked bar chart we need to mention barmode = 'stack' while creating
    layout.

In [None]:
trace0 = go.Bar(x = df['NOC'], y = df['Gold'], name = 'Gold', marker = dict(color = '#FFD700'))
trace1 = go.Bar(x = df['NOC'], y = df['Silver'], name = 'Silver', marker = dict(color = '#C0C0C0'))
trace2 = go.Bar(x = df['NOC'], y = df['Bronze'], name = 'Bronze', marker = dict(color = '#CD7F32'))

data = [trace0, trace1, trace2]


# just set the barmode to 'stack' in go.Layout() method
layout = go.Layout(title = 'Medals', barmode = 'stack')

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

### Bar chart exercise

        Create a stacked bar chart from the file 'mocksurvey.csv'. Note that the questions appear in the index
        (and should be used for the x axis), while responses appear as column labels.

In [None]:
df = pd.read_csv('datasets/mocksurvey.csv')
df

In [None]:
questions = df['Unnamed: 0'].values
questions # These will be used as x axis labels.

In [None]:
categories = df.columns[1:]
categories

In [None]:
def values_giver(category:str):
    return [df[category][i] for i in range(len(questions))]

values_giver('Strongly Agree')

In [None]:
data = [go.Bar(x = questions, y = values_giver(category), name = category) for category in categories]
data

In [None]:
# create the layout
layout = go.Layout(title = 'bar_chart_assignment_one', 
                   xaxis = dict(title = 'questions'), 
                   yaxis = dict(title = 'opinion'), 
                   barmode = 'stack')

# create the figure
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

### How to make horizontal chart?

    To make a bar chart horizontal we just need to pass "orientation = 'h'" in the go.Bar() method and reverse x and y.

In [None]:
df = pd.read_csv('datasets/mocksurvey.csv')
questions = df['Unnamed: 0'].values
categories = df.columns[1:]

def values_giver(category:str):
    return [df[category][i] for i in range(len(questions))]


# Reverse the x and y and mention orientation as 'h'.
data = [go.Bar(y = questions, x = values_giver(category), name = category, orientation='h') for category in categories]

layout = go.Layout(title = 'bar_chart_assignment_one', xaxis = dict(title = 'questions'), 
                   yaxis = dict(title = 'opinion'), barmode = 'stack')
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# Bubble chart

    Bubble chart is the same as the scatter plot. It is just that we have different sizes for each marker based on some other numerical plot.
    For plotting the bubble chart, we use pyo.Scatter() method and just use the size parameter in the marker attribute of this function.

In [None]:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

df = pd.read_csv('datasets/mpg.csv')

In [None]:
df

In [None]:
trace = go.Scatter(x = df['horsepower'], 
                   y = df['mpg'], 
                   text = df['name'],
                   mode = 'markers',
                   marker = dict(size = df['weight'] / 100, 
                                 color = df['cylinders'], 
                                 showscale=True # parameter to show the color scale ## very very important
                                ),
                  )

data = [trace]

layout = go.Layout(title = 'Bubble Chart')

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig)

# Box plots
    Box plots visualize the variation of a numerical column by depicting the continuos numerical data through quartiles.
    
    It is used for univariate analysis.
    
    Suppose we have two columns heights and gender(male and female). Then we create two box plots. 
    The first one for males and the second one for females.

    It works on the concept of quartiles. Quartiles seperate the data into 4 equal categories.

    The IQR(inter quartile range) is the length of the filled in box Q3-Q1. The middle line depicts median.
        
    Outliers are the value which are above the Q3 by (1.5)*IQR or are below the Q1 by (1.5)*IQR

    Outliers will be displayed as singular markers outside the whiskers.

In [None]:
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go

In [None]:
y = [1, 14, 14, 15, 16, 18, 18, 19, 19, 20, 20, 23, 24, 
     26, 27, 27, 28, 29, 33, 54]

trace = go.Box(y=y)
data = [trace]
pyo.plot(data)

### boxpoints
    The default value for boxpoints is 'outliers'. If we want to see all the points then we can set it to 'all'.

In [None]:
y = [1, 14, 14, 15, 16, 18, 18, 19, 19, 20, 20, 23, 24, 
     26, 27, 27, 28, 29, 33, 54]

trace = go.Box(y=y, boxpoints='all')
data = [trace]
pyo.plot(data)

### pointpos
    A -ve pointpos means that points will be positioned to the left of the box. A +ve pointpos means that points will be positioned to the right of the box.
    A 0 pointpos means that the points will be shown inside the box.

In [None]:
y = [1, 14, 14, 15, 16, 18, 18, 19, 19, 20, 20, 23, 24, 
     26, 27, 27, 28, 29, 33, 54]

trace = go.Box(y=y, boxpoints='all', 
               pointpos = 0, 
              )

data = [trace]
pyo.plot(data)

### jitter
    The jitter parameter is used to increase the spacing between the points.
    The value of jitter must lie in the interval [0,1].

In [None]:
y = [1, 14, 14, 15, 16, 18, 18, 19, 19, 20, 20, 23, 24, 
     26, 27, 27, 28, 29, 33, 54]

trace = go.Box(y=y, 
               boxpoints='all', 
               pointpos = 0, 
               jitter = 0.6 
              )
data = [trace]
pyo.plot(data)

# Histogram

    1. Histogram displays an accurate representation of the overall distribution of a continous feature.
    2. To create a histogram, we divide the entire range of values of the continuous feature into a series of intervals.

In [None]:
# perform the necessary imports
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go


In [None]:
df = pd.read_csv('datasets/mpg.csv')
df

In [None]:
trace = go.Histogram(x = df['mpg'],)

data = [trace]

layout = go.Layout(title = 'Histogram')

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# Histogram plot exercise

    Create a histogram that plots the 'length' field from the Abalone dataset (../data/abalone.csv). Set the range from 0 to 1, with a bin size of 0.02.

In [None]:
import pandas as pd
import numpy  as np
import plotly.offline as pyo
import plotly.graph_objs as go

In [None]:
df = pd.read_csv('datasets/abalone.csv')
df

In [None]:
trace = go.Histogram(x = df['length'], 
                     xbins=dict(start=0, 
                                end=1, 
                                size=0.02))

data = [trace]

# layout
layout = go.Layout(title = 'Histogram exercise',
                   xaxis = {'title': 'length'}, 
                   yaxis = dict(title='frequency'))

# fig
fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

# Distplots
    These are combination of histogram, rug plot and kde(kernel density estimation plot).
    

In [1]:
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.figure_factory as ff

In [2]:
np.random.seed(42)
x = np.random.randn(200)

In [3]:
hist_data = [x]
group_labels = ['distplot']

# fig
fig = ff.create_distplot(hist_data = hist_data, 
                         group_labels=group_labels)

pyo.plot(fig)

'temp-plot.html'