# plotly
    Scatter plots allow the comparison of two variables for a set of data.
    OOP method, plotly.express
    ==========================
    1. traces --> graph objects
    2. Then store all the traces in an list
    3. layout --> graph objects
    4. fig --> list + layout -- graph objects
    5. plot(fig) -- plotly.offline.plot(fig)

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as pyo # plot method

In [2]:
# random data generator
np.random.seed(42)
random_x = np.random.randint(1, 101,  # not included
                             100)
random_y = np.random.randint(1, 101, 100)

In [5]:
# trace
trace = go.Scatter(x = random_x, y = random_y)

# list of trace
data = [trace]

# layout
layout = go.Layout(title = 'My first graph')

# Figure
fig = go.Figure(data = data, layout = layout)

# plot
pyo.plot(fig)

'temp-plot.html'

# Scatter plot

    1. bivariate analysis -- 2 variables(height and weight)
    2. numerical vs numerical data
    3. used for study correlation
    4. go.Scatter() -- scatter, line, bubble
    5. unordered data -- ordered XX

In [6]:
np.random.seed(42)
random_x = np.random.randint(1, 101,  # not included
                             100)
random_y = np.random.randint(1, 101, 100)
random_x

array([ 52,  93,  15,  72,  61,  21,  83,  87,  75,  75,  88, 100,  24,
         3,  22,  53,   2,  88,  30,  38,   2,  64,  60,  21,  33,  76,
        58,  22,  89,  49,  91,  59,  42,  92,  60,  80,  15,  62,  62,
        47,  62,  51,  55,  64,   3,  51,   7,  21,  73,  39,  18,   4,
        89,  60,  14,   9,  90,  53,   2,  84,  92,  60,  71,  44,   8,
        47,  35,  78,  81,  36,  50,   4,   2,   6,  54,   4,  54,  93,
        63,  18,  90,  44,  34,  74,  62, 100,  14,  95,  48,  15,  72,
        78,  87,  62,  40,  85,  80,  82,  53,  24])

In [7]:
# create a scatter trace
trace = go.Scatter(x = random_x, y = random_y, mode = 'markers+lines')
# mode -- markers, lines, markers+lines

# store all the traces in a list
data = [trace] # convention

# layout
layout = go.Layout(title = 'My first scatter plot', xaxis = {'title':'random_x'}, yaxis = dict(title='random_y'))

# fig
fig = go.Figure(data = data, layout = layout)

# plot
pyo.plot(fig) #temp-plot.html

'temp-plot.html'

In [8]:
np.random.seed(42)
random_x = np.random.randint(1, 101,  # not included
                             100)
random_y = np.random.randint(1, 101, 100)
random_x

array([ 52,  93,  15,  72,  61,  21,  83,  87,  75,  75,  88, 100,  24,
         3,  22,  53,   2,  88,  30,  38,   2,  64,  60,  21,  33,  76,
        58,  22,  89,  49,  91,  59,  42,  92,  60,  80,  15,  62,  62,
        47,  62,  51,  55,  64,   3,  51,   7,  21,  73,  39,  18,   4,
        89,  60,  14,   9,  90,  53,   2,  84,  92,  60,  71,  44,   8,
        47,  35,  78,  81,  36,  50,   4,   2,   6,  54,   4,  54,  93,
        63,  18,  90,  44,  34,  74,  62, 100,  14,  95,  48,  15,  72,
        78,  87,  62,  40,  85,  80,  82,  53,  24])

In [9]:
# create a scatter trace
trace = go.Scatter(x = random_x, 
                   y = random_y, 
                   mode = 'markers', 
                   marker = dict(
                                    size = 12,
                                    color = 'rgb(51, 204, 153)',
                                    symbol = 'pentagon',
                                    line = dict(width = 2)
                                )
                  )
# mode -- markers, lines, markers+lines

# store all the traces in a list
data = [trace] # convention

# layout
layout = go.Layout(title = 'My first scatter plot', xaxis = {'title':'random_x'}, yaxis = dict(title='random_y'))

# fig
fig = go.Figure(data = data, layout = layout)

# plot
pyo.plot(fig) #temp-plot.html

'temp-plot.html'

# Line plot
    1. Ordered(asc) x axis values
    2. bivariate analysis
    3. numerical vs numerical 
    4. regression analysis -- slope(machine learning)

In [10]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as pyo

# The path of minimum length between two (distinct) points is line.
# point -- undefined thing in maths
# 2 distict points -- unique line ?? (euclid axiom)


In [11]:
np.random.seed(56)
x_values = np.linspace(0, 1, 100)
y_values = np.random.randn(100)

#trace
trace = go.Scatter(x = x_values, y = y_values)

# store
data = [trace]

# layout
layout = go.Layout(title = 'Line plot', xaxis = dict(title = 'random x values'), yaxis = dict(title = 'random y values'))

# fig
fig = go.Figure(data = data, layout = layout)

# plot
pyo.plot(fig)

'temp-plot.html'

In [13]:
np.random.seed(56)
x_values = np.linspace(0, 1, 100)
y_values = np.random.randn(100)

#trace
trace0 = go.Scatter(x = x_values, y = y_values-5, mode = 'markers', name = 'markers only')
trace1 = go.Scatter(x = x_values, y = y_values, mode = 'lines', name = 'lines only')
trace2 = go.Scatter(x = x_values, y = y_values+5, mode = 'markers+lines', name = 'markers and lines')

# store
data = [trace0, trace1, trace2]

# layout
layout = go.Layout(title = 'Line plot', xaxis = dict(title = 'random x values'), yaxis = dict(title = 'random y values'))

# fig
fig = go.Figure(data = data, layout = layout)

# plot
pyo.plot(fig)

'temp-plot.html'

# plotting iris dataset using plotly

In [14]:
iris = pd.read_csv('datasets/iris.csv')

In [15]:
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [20]:
species = iris['class'].value_counts().index.tolist()

In [23]:
trace = go.Scatter(x=iris['sepal_length'], y = iris['petal_length'], mode = 'markers')

data = [trace]

layout = go.Layout(title = 'Iris species', xaxis = dict(title='sepal_length'), yaxis = dict(title = 'petal_length'))

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

'temp-plot.html'

In [28]:
iris[iris['class'] == 'Iris-setosa']['sepal_length'].values

array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. ])

In [29]:
species

['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

In [34]:
def sepal_length_giver(specie : str) -> list:
    mask = (iris['class'] == specie)
    return iris[mask]['sepal_length'].values

def petal_length_giver(specie : str) -> list:
    mask = (iris['class'] == specie)
    return iris[mask]['petal_length'].values

In [35]:
petal_length_giver('Iris-setosa')

array([1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4,
       1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1. , 1.7, 1.9, 1.6,
       1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.5, 1.3,
       1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4])

In [41]:
trace_setosa = go.Scatter(x=sepal_length_giver('Iris-setosa'), 
                          y = petal_length_giver('Iris-setosa'), mode = 'markers', 
                          marker = dict(color='red'), name = 'setosa')

trace_versicolor = go.Scatter(x=sepal_length_giver('Iris-versicolor'), 
                              y = petal_length_giver('Iris-versicolor'), mode = 'markers', 
                              marker = dict(color='blue'), name = 'versicolor')

trace_virginica = go.Scatter(x=sepal_length_giver('Iris-virginica'), 
                             y = petal_length_giver('Iris-virginica'), mode = 'markers', 
                              marker = dict(color='green'), name = 'virginica')


# data = [go.Scatter(x = sepal_length_giver(specie), y = petal_length_giver(specie), name=specie, mode = 'markers') 
#         for specie in species]

In [42]:
data = [trace_setosa, trace_versicolor, trace_virginica]

layout = go.Layout(title = 'Iris species', xaxis = dict(title='sepal_length'), yaxis = dict(title = 'petal_length'))

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig)

'temp-plot.html'

# Line chart assignment

    Using the file 2010YumaAZ.csv, develop a line chart that plots seven days worth of temperature data on one graph.
    You can use a for loop to assign each day to its own trace.

In [43]:
df = pd.read_csv('datasets/2010YumaAZ.csv')

In [44]:
df

Unnamed: 0,LST_DATE,DAY,LST_TIME,T_HR_AVG
0,20100601,TUESDAY,0:00,25.2
1,20100601,TUESDAY,1:00,24.1
2,20100601,TUESDAY,2:00,24.4
3,20100601,TUESDAY,3:00,24.9
4,20100601,TUESDAY,4:00,22.8
...,...,...,...,...
163,20100607,MONDAY,19:00,39.4
164,20100607,MONDAY,20:00,38.5
165,20100607,MONDAY,21:00,37.0
166,20100607,MONDAY,22:00,34.7


In [51]:
# Tuesday
tuesday = df[df['DAY'] == 'TUESDAY'] # mask
times = tuesday['LST_TIME'].values
temps = tuesday['T_HR_AVG'].values

In [55]:
trace = go.Scatter(x = times, y = temps, mode = 'lines')

data = [trace]

layout = go.Layout(title = 'TUESDAY', xaxis = dict(title = 'time'), yaxis = dict(title = 'temp'))

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig)

'temp-plot.html'

In [60]:
days = df['DAY'].value_counts().index

In [63]:
def times(day :str) -> list:
    mask = (df['DAY'] == day)
    return df[mask]['LST_TIME'].values

def temps(day :str) -> list:
    mask = (df['DAY'] == day)
    return df[mask]['T_HR_AVG'].values

data = [go.Scatter(x = times(day), y = temps(day), mode = 'markers+lines',name = day) for day in days]

layout = go.Layout(title = 'days', xaxis = dict(title = 'time'), yaxis = dict(title = 'temp'))

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig)

'temp-plot.html'

# Line chart assignment 2

    Take the data 'population.csv' and plot the lines + markers graph for the population estimates of the six new england states.

In [66]:
df = pd.read_csv('datasets/population.csv')
df

Unnamed: 0,Name,PopEstimate2010,PopEstimate2011,PopEstimate2012,PopEstimate2013,PopEstimate2014,PopEstimate2015,PopEstimate2016,PopEstimate2017
0,Connecticut,3580171,3591927,3597705,3602470,3600188,3593862,3587685,3588184
1,Maine,1327568,1327968,1328101,1327975,1328903,1327787,1330232,1335907
2,Massachusetts,6564943,6612178,6659627,6711138,6757925,6794002,6823721,6859819
3,New Hampshire,1316700,1318345,1320923,1322622,1328684,1330134,1335015,1342795
4,Rhode Island,1053169,1052154,1052761,1052784,1054782,1055916,1057566,1059639
5,Vermont,625842,626210,625606,626044,625665,624455,623354,623657


In [70]:
states = df['Name'].values
states

array(['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire',
       'Rhode Island', 'Vermont'], dtype=object)

In [81]:
x = df.columns[1:]

In [85]:
def func(state : str) -> list:
    mask = df['Name'] == state
    row = df[mask]
    
    return [row[year].values[0] for year in x]

data = [go.Scatter(x=x, y = func(state), mode = 'markers+lines', name=state) for state in states]

layout = go.Layout(title = 'Population Estimate', xaxis = dict(title = 'year'), yaxis = dict(title = 'population'))

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig)

'temp-plot.html'

In [84]:
func('Massachusetts')

[6564943, 6612178, 6659627, 6711138, 6757925, 6794002, 6823721, 6859819]

# Bar chart