# Using plotly
## Scatterplots

In [1]:
#We can interpret some form of correlation

In [7]:
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go

In [3]:
np.random.seed(42)
random_x = np.random.randint(1,101,100) #100 random integers
random_y = np.random.randint(1,101,100) 

In [14]:
#plotly syntax: plot in a variable called data and is passed into a plot call
#so plot inside of a list
data = [go.Scatter(x=random_x,
                   y =random_y,
                   mode = 'markers', 
                  marker = dict(size = 12,
                               color = "rgb(51,204,153)",
                               symbol = "pentagon",
                               line = dict(width = 2)
                               ))] #this is our data
#layout
layout = go.Layout(title = "Hello First plot",
                  xaxis = {"title":"MY X AXIS"},
                  yaxis = dict(title = "MY Y AXIS"), #both approaches for passing a dictionary is ok, but plotly likes these dict calls
                  hovermode = "closest")

pyo.plot(data, filename = "scatter.html") #saves the plot

fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename="scatter.html")

'scatter.html'

## Line Charts
- Displays series of data points called markers joined by line segments
- x-value is ordered in some way

In [24]:
np.random.seed(56)
x_values=np.linspace(0,1,100)
y_values = np.random.randn(100)

trace0 = go.Scatter(x=x_values,y=y_values+5,mode = "markers",
                  name = "markers")
trace1 = go.Scatter(x=x_values,y=y_values,mode = "lines",name="mylines")

trace2 = go.Scatter(x=x_values,y=y_values-5,mode = "lines+markers",name="myfavorite")

data = [trace0,trace1,trace2]

layout = go.Layout(title = "Line Charts")

fig = go.Figure(data=data,layout=layout)

pyo.plot(fig)

'temp-plot.html'

## Using real data set with pandas

In [40]:
import pandas as pd

In [63]:
df = pd.read_csv("nst-est2017-alldata.csv")

df2 = df[df["DIVISION"] == "1"]#only want those obs with a 1 in the DIVISON col
df2.set_index("NAME", inplace=True) #grabs col in df and sets that as the index i.e row name

In [64]:
#If want only population columns
#list comprehension
list_of_pop_col = [col for col in df2.columns if col.startswith("POP")]

#df2.columns gives list of col names, then wer're syaig for col in df2.columns, we seect them nly if it starts with col.startswith("POP")
#so it will keep col for every col in df2.cols that starts with POP
df2 = df2[list_of_pop_col]

data = [go.Scatter(x=df2.columns,y=df2.loc[name],mode="lines",name=name) for name in df2.index] 
#build a scatterplot for every state in df2 index''#set x = columns, y .loc grabs the number value - so for every name eg. Connecticut grab the value
pyo.plot(data)

'temp-plot.html'

## Exercise

In [75]:
#Use file to make line chart that plots seven days worth of temp data on a graph
df = pd.read_csv("2010YumaAZ.csv")
days = list(df["DAY"].unique())


data = []
for day in days:
    #for y axis temp hr avg, we want the data to be according to corresponding day
    #if we dont specify day, then it will take temp avg for all days and graph will be wrong
    #bc each line represents a separate day
    trace = go.Scatter(x=df["LST_TIME"],
                       y=df[df["DAY"]==day]["T_HR_AVG"]
                       ,mode="lines"
                       ,name = day)
    data.append(trace)

layout = go.Layout(title="Daily temp avg")
    
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [79]:
data = [{"x":df["LST_TIME"],
        "y": df[df["DAY"] == day]["T_HR_AVG"],
         "name":day
        } for day in df["DAY"].unique()]

layout = go.Layout(title = "Daily temp avgs")

## Bar Charts
- Stacked, normal, nested bar charts,  

In [96]:
df = pd.read_csv("2018WinterOlympics.csv")

data = [go.Bar(x=df["NOC"],y=df["Total"])]
layout=go.Layout(title="Medals")
fig = go.Figure(data,layout)
pyo.plot(fig)
df

Unnamed: 0,Rank,NOC,Gold,Silver,Bronze,Total
0,1,Norway,14,14,11,39
1,2,Germany,14,10,7,31
2,3,Canada,11,8,10,29
3,4,United States,9,8,6,23
4,5,Netherlands,8,6,6,20
5,6,Sweden,7,6,1,14
6,7,Republic of Korea,5,8,4,17
7,8,Switzerland,5,6,4,15
8,9,France,5,4,6,15
9,10,Austria,5,3,6,14


In [83]:
#Nested BAr Chart
trace1 = go.Bar(x=df["NOC"],y=df["Gold"],
                name="Gold",marker={"color": "#FFD700"})
trace2 = go.Bar(x=df["NOC"],y=df["Silver"],
                name="Silver",marker={"color": "#9EA0A1"})
trace3 = go.Bar(x=df["NOC"],y=df["Bronze"],
                name="Bronze",marker={"color": "#CD7F32"})

data = [trace1,trace2,trace3]
layout=go.Layout(title="Medals") #change to stacked by writing barmode="stack"
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

### Bar chart exercise: Create a stacked bar chart

In [98]:
df = pd.read_csv("mocksurvey.csv")
print(df)

   Unnamed: 0  Strongly Agree  Somewhat Agree  Neutral  Somewhat Disagree  \
0  Question 1            0.45            0.25     0.10               0.12   
1  Question 2            0.12            0.07     0.48               0.18   
2  Question 3            0.05            0.22     0.19               0.23   

   Strongly Disagree  
0               0.08  
1               0.15  
2               0.31  


In [99]:
data = [go.Bar(x=df.index,y=df[response],name=response) for response in df.columns]
#x represents categories which should be unique, and y represents values of columns per category
#orientation = "h" gives us horizontal barplot
layout = go.Layout(title = "Survey Results", barmode = "stack")

fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

## Bubble Charts
- similar to scatter plots except we now convey a third variable's info through size of markers

In [101]:
df = pd.read_csv("mpg.csv")
df #we want to 

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
5,15.0,8,429.0,198,4341,10.0,70,1,ford galaxie 500
6,14.0,8,454.0,220,4354,9.0,70,1,chevrolet impala
7,14.0,8,440.0,215,4312,8.5,70,1,plymouth fury iii
8,14.0,8,455.0,225,4425,10.0,70,1,pontiac catalina
9,15.0,8,390.0,190,3850,8.5,70,1,amc ambassador dpl


In [102]:
data = [go.Scatter(x=df["horsepower"],y=df["mpg"],
                  text = df["name"],
                  mode = "markers",
                  marker=dict(size=2*df["cylinders"]))] #text will display actual model of the car when you hover
#so some markers with more cylinders will be larger

In [103]:
layout=go.Layout(title="bubble chart")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [106]:
#Make bubble chart to some other value
#weight
data = [go.Scatter(x=df["horsepower"],y=df["mpg"],
                  text = df["name"],
                  mode = "markers",
                  marker=dict(size=df["weight"]/100, color = df["cylinders"],showscale=True))] #adjust bubble size by using marker, color according to cylinder factor too
layout=go.Layout(title="bubble chart")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

### Task: Create bubble chart that cmpares three other features from mpg.csv dataset.

In [107]:
data = [go.Scatter(x=df["displacement"],
                  y = df["acceleration"],
                  text = df["name"],
                  mode = "markers",
                  marker = dict(size=df["weight"]/400))]
layout = go.Layout(title="Bubble solution",hovermode="closest")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

## Boxplots

In [120]:
y = [1,14,14,15,16,18,18,19,19,20,20,23,24,26,27,27,28,29,33,54]
y

[1, 14, 14, 15, 16, 18, 18, 19, 19, 20, 20, 23, 24, 26, 27, 27, 28, 29, 33, 54]

In [122]:
data = [go.Box(y=y,boxpoints="all",jitter=0.3,pointpos=0)]
#boxpoints gives all data points, jitter will spread out values so all data points aper
#pointpos = 0 puts points right on top of the box, positive nos. are to the right of the boxplot
pyo.plot(data)

'temp-plot.html'

In [124]:
#plotting two points
snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

data = [go.Box(y=snodgrass,name="Snodgrass"),
       go.Box(y=twain,name="Twain")]
pyo.plot(data)

'temp-plot.html'

### Exercise: Make df using abalone dataset and take two ind random samples of diff sizes from rings field. Use boxplots to show that samples derive from same population

In [134]:
df = pd.read_csv("abalone.csv")
samp1 = np.random.choice(df["rings"],30,replace=False)
samp2 = np.random.choice(df["rings"],20,replace=False)

In [135]:
data = [go.Box(y=samp1, name = "Sample1"),
       go.Box(y=samp2, name = "Sample2")]
pyo.plot(data)

'temp-plot.html'