# Plotly Practice: Scatter Plots

This notebook follows the data visualization tutorial for `seaborn` from Kaggle, but I will used `plotly` to produce said plots.

In [1]:
# general
import numpy as np
import pandas as pd
pd.plotting.register_matplotlib_converters()

# mpl
import matplotlib as mpl
import matplotlib.pyplot as plt

# plotly
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'iframe'

# stats
from scipy import stats

In [2]:
# import data
candy_path = '../data/datavis/candy.csv'
candy = pd.read_csv(candy_path,index_col='id')

In [3]:
# view data
candy.head()

Unnamed: 0_level_0,competitorname,chocolate,fruity,caramel,peanutyalmondy,nougat,crispedricewafer,hard,bar,pluribus,sugarpercent,pricepercent,winpercent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,100 Grand,Yes,No,Yes,No,No,Yes,No,Yes,No,0.732,0.86,66.971725
1,3 Musketeers,Yes,No,No,No,Yes,No,No,Yes,No,0.604,0.511,67.602936
2,Air Heads,No,Yes,No,No,No,No,No,No,No,0.906,0.511,52.341465
3,Almond Joy,Yes,No,No,Yes,No,No,No,Yes,No,0.465,0.767,50.347546
4,Baby Ruth,Yes,No,Yes,Yes,Yes,No,No,Yes,No,0.604,0.767,56.914547


In [4]:
# plotting a scatterplot of sugar vs popularity

# labels
f_title = "Sugar content vs. Popularity"
ax_labels = { 'x' : "Sugar content (%)"
             ,'y' : "Popularity (%)"
            }

# make figure
fig = px.scatter(x=candy.sugarpercent*100
                ,y=candy.winpercent
                ,title=f_title
                ,labels=ax_labels
                )
# centre title
fig.update_layout(title_x=0.5, title_y=.85)

# plot
fig.show()

In [5]:
# plot popularity vs sugar content with regression

# labels
f_title = "Sugar content vs. Popularity"
ax_labels = { 'x' : "Sugar content (%)"
             ,'y' : "Popularity (%)"
            }

# make figure
fig = px.scatter(x=candy.sugarpercent
                ,y=candy.winpercent
                ,title=f_title
                ,labels=ax_labels
                ,trendline='ols')

# centre title
fig.update_layout(title_x=0.5, title_y=.85)

# plot
fig.show()

In [8]:
# print result information
fit_res = px.get_trendline_results(fig)
fit_res.query("0").px_fit_results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.044
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,1.601
Date:,"Wed, 22 Jun 2022",Prob (F-statistic):,0.214
Time:,20:05:17,Log-Likelihood:,-145.53
No. Observations:,37,AIC:,295.1
Df Residuals:,35,BIC:,298.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,54.1732,5.727,9.459,0.000,42.546,65.800
x1,10.6750,8.436,1.265,0.214,-6.450,27.800

0,1,2,3
Omnibus:,0.431,Durbin-Watson:,1.613
Prob(Omnibus):,0.806,Jarque-Bera (JB):,0.565
Skew:,-0.031,Prob(JB):,0.754
Kurtosis:,2.398,Cond. No.,5.73


In [7]:
# plot popularity vs price with regression
# categorised by --> chocolate content

# labels
f_title = "Price vs. Popularity"
ax_labels = { 'pricepercent' : "Price Distribution (%)"
             ,'winpercent'   : "Popularity (%)"
            }

# make figure
fig = px.scatter(candy
                ,x='pricepercent'
                ,y='winpercent'
                ,color='chocolate'
                ,title=f_title
                ,labels=ax_labels
                ,trendline='ols'
                ,width=600
                ,height=400)


# plot
fig.show()