In [1]:
#pip install plotly

In [2]:
import plotly.express as px

# px.scatter

In [3]:
df = px.data.iris()
df.sample(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
24,4.8,3.4,1.9,0.2,setosa,1
16,5.4,3.9,1.3,0.4,setosa,1
52,6.9,3.1,4.9,1.5,versicolor,2
37,4.9,3.1,1.5,0.1,setosa,1
118,7.7,2.6,6.9,2.3,virginica,3


# px.scatter – bubble chart

Setting size and color with column names
Scatter plots with variable-sized circular markers are often known as bubble charts. Note that color and size data are added to hover information. You can add other columns to hover data with the hover_data argument of px.scatter.

In [4]:
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species",
                 size='petal_length', hover_data=['petal_width'])
fig.show()

# Discrete Color with Plotly Express

Most Plotly Express functions accept a color argument which automatically assigns data values to discrete colors if the data is non-numeric. If the data is numeric, the color will automatically be considered continuous. This means that numeric strings must be parsed to be used for continuous color, and conversely, numbers used as category codes must be converted to strings.

In [5]:
tips = px.data.tips()
tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB


In [6]:
fig = px.scatter(tips, x = 'size', y = 'tip', color = 'day',size = 'total_bill',title="tip vs size", symbol = 'day')
fig.show()

In [7]:

fig = px.scatter(tips, x="total_bill", y="tip", color="smoker",
                 title="String 'smoker' values mean discrete colors")
fig.show()

# Numeric value "size" means continuous color

In [8]:
fig = px.scatter(tips, x="total_bill", y="tip", color="size",
                 title="Numeric 'size' values mean continuous color")
fig.show()

# The symbol argument can be mapped to a column as well

In [9]:
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length",
                 color="species", symbol="species")
fig.show()

# Customized symbol with .update_traces( )

Styling Markers in Python
https://plotly.com/python/marker-style/

In [10]:
fig = px.scatter(df, x="sepal_width", y="sepal_length",
                 color="species", symbol="species").update_traces(
                marker=dict(size=12, line=dict(width=2,
                color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.show()

# Linear Regression Line (OLS)

In [11]:
fig = px.scatter(tips, x="total_bill", y="tip", trendline="ols")
fig.show()

In [12]:
import pandas as pd

df_gold = pd.read_csv("gold_goldstock.csv")
df_gold["Date"] = pd.to_datetime(df_gold['Date'], dayfirst=True)
df_gold = df_gold.set_index('Date')

In [13]:
df_gold.sample(3)

Unnamed: 0_level_0,GoldSpot,Zijin,Zhaojin
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-11-14,1188.75,2.19,4.0
2015-05-22,1206.21,2.99,5.54
2013-08-31,1395.27,1.95,7.35


In [14]:
fig = px.scatter(df_gold, x="GoldSpot",
        y=["Zijin","Zhaojin"],  trendline="ols" )
fig.show()

# Facet Plots

In [15]:
fig = px.scatter(tips, x="total_bill", y="tip",
                 color="smoker", facet_col="sex")
fig.show()

In [16]:
fig = px.scatter(tips, x="total_bill", y="tip",
    color="smoker", facet_col="sex", facet_row="time")
fig.show()

# px.box - Box plot

The exclusive algorithm uses the median to divide the ordered dataset into two halves. If the sample is odd, it does not include the median in either half. Q1 is then the median of the lower half and Q3 is the median of the upper half.

The inclusive algorithm also uses the median to divide the ordered dataset into two halves, but if the sample is odd, it includes the median in both halves. Q1 is then the median of the lower half and Q3 the median of the upper half.

In [17]:
fig = px.box(tips, x="day", y="total_bill", color="smoker")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()

# Facet plot

In [18]:
df_stock = px.data.stocks(indexed=True)
df_stock.head(3)

company,GOOG,AAPL,AMZN,FB,NFLX,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-01,1.0,1.0,1.0,1.0,1.0,1.0
2018-01-08,1.018172,1.011943,1.061881,0.959968,1.053526,1.015988
2018-01-15,1.032008,1.019771,1.05324,0.970243,1.04986,1.020524


In [19]:
fig = px.line(df_stock, facet_col="company", facet_col_wrap=3)
fig.show()

# Adding Lines and Rectangles to Facet Plots

It is possible to add labelled horizontal and vertical lines and rectangles to facet plots using .add_hline(), .add_vline(), .add_hrect() or .add_vrect(). The default row and col values are "all" but this can be overridden, as with the rectangle below, which only appears in the first column.

In [20]:
fig = px.line(df_stock, facet_col="company", facet_col_wrap=2)
fig.add_hline(y=1, line_dash="dot",
              annotation_text="Jan 1, 2018 baseline",
              annotation_position="bottom right")
fig.add_vrect(x0="2018-09-24", x1="2018-12-18", col=1,
              annotation_text="decline", annotation_position="top left",
              fillcolor="green", opacity=0.25, line_width=0)
fig.show()

In [21]:
fig = px.line(df_stock, facet_col="company", facet_col_wrap=2)
fig.add_hline(y=1, line_dash="dot", col=2,
              annotation_text="Jan 1, 2018 baseline",
              annotation_position="bottom right")
fig.add_vrect(x0="2018-09-24", x1="2018-12-18",
              annotation_text="decline", annotation_position="top left",
              fillcolor="green", opacity=0.25, line_width=0)
fig.show()

# Line plot

Styling Markers in Python https://plotly.com/python/marker-style/

In [22]:
df_life = px.data.gapminder().query("continent == 'Oceania'")
df_life.sample(3)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
1103,New Zealand,Oceania,2007,80.204,4115771,25185.00911,NZL,554
66,Australia,Oceania,1982,74.74,15184200,19477.00928,AUS,36
64,Australia,Oceania,1972,71.93,13177000,16788.62948,AUS,36


In [23]:
fig = px.line(df_life, x='year', y='lifeExp', color='country', markers=True)
fig.show()

# Line plot with Datetime index

In [24]:
df_gold = pd.read_csv("gold_goldstock.csv")
df_gold["Date"] = pd.to_datetime(df_gold['Date'], dayfirst=True)
df_gold = df_gold.set_index('Date')

fig = px.line(df_gold, x=df_gold.index, y=["Zijin","Zhaojin"], title="Gold Stock")
fig.show()

# Enable log scale for large range value

In [25]:
fig = px.line(df_gold, x=df_gold.index, y=["GoldSpot","Zijin","Zhaojin"], title="Gold Stock")
fig.show()

In [26]:
fig = px.line(df_gold, x=df_gold.index, y=["GoldSpot","Zijin","Zhaojin"],
              log_y=True, title="Gold Stock")
fig.show()

# px.pie - Pie chart

In [27]:
fig = px.pie(tips, values='tip', names='day')
fig.show()

# px.sunburst - Sunburst of a rectangular DF

In [28]:
fig = px.sunburst(tips, path=['day', 'time', 'sex'], values='total_bill')
fig.show()

# px.funnel_area – Funnel plot

In [29]:
fig = px.funnel_area(names=["Site Visitor","Product View",
                    "Wish List", "Shopping Cart", "Order Paid"],
                    values=[500, 200, 50, 35, 20])
fig.show()

# px.bar - Bar Chart

In [30]:
df_pop = px.data.gapminder().query("continent == 'Europe' and year == 2007 and pop > 2.e6")
fig = px.bar(df_pop, y='pop', x='country', text_auto='.2s',
            title="Default: various text sizes, positions and angles")
fig.show()

# 3D scatter plot

In [31]:
df_iris = px.data.iris()
fig = px.scatter_3d(df_iris, x='sepal_length',
        y='sepal_width', z='petal_width', color='species')
fig.show()

# px.scatter_geo - Geographical Scatter Plot

In [32]:
df_geo = px.data.gapminder().query("year == 2007")
fig = px.scatter_geo(df_geo, locations="iso_alpha",
                     color="continent", # which column to use to set the color of markers
                     hover_name="country", # column added to hover information
                     size="pop", # size of markers
                     projection="natural earth")
fig.show()

# px.treemap

If a color argument is passed, the color of a node is computed as the average of the color values of its children, weighted by their values.

In [33]:
import numpy as np
fig = px.treemap(df_geo, path=[px.Constant("world"), 'continent', 'country'], values='pop',
                  color='lifeExp', hover_data=['iso_alpha'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df_geo['lifeExp'], weights=df_geo['pop']))
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()

# px.parallel_coordinates

In [34]:
df_iris.sample(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
88,5.6,3.0,4.1,1.3,versicolor,2
25,5.0,3.0,1.6,0.2,setosa,1
69,5.6,2.5,3.9,1.1,versicolor,2
89,5.5,2.5,4.0,1.3,versicolor,2
31,5.4,3.4,1.5,0.4,setosa,1


In [35]:
fig = px.parallel_coordinates(df_iris, color="species_id",
                              dimensions=['sepal_width', 'sepal_length', 'petal_width',
                                          'petal_length'],
                              color_continuous_scale=px.colors.diverging.Tealrose,
                              color_continuous_midpoint=2)
fig.show()

# px.choropleth

In a choropleth map, each row of data_frame is represented by a colored region mark on a map.
https://plotly.github.io/plotly.py-docs/generated/plotly.express.choropleth.html

scope (str (default 'world').) – One of 'world', 'usa', 'europe', 'asia', 'africa', 'north america', or 'south america'`Default is `'world' unless projection is set to 'albers usa', which forces 'usa'.

locationmode={['ISO-3', 'USA-states', 'country names', 'geojson-id']}

ISO-3 Alpha Location List: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3

In [36]:
df_exp = px.data.gapminder().query("year==2007")
df_exp.sample(5)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
1355,Sierra Leone,Africa,2007,42.568,6144562,862.540756,SLE,694
1151,Norway,Europe,2007,80.196,4627926,49357.19017,NOR,578
1655,Vietnam,Asia,2007,74.249,85262356,2441.576404,VNM,704
155,Bosnia and Herzegovina,Europe,2007,74.852,4552198,7446.298803,BIH,70
1175,Pakistan,Asia,2007,65.483,169270617,2605.94758,PAK,586


In [37]:
fig = px.choropleth(df_exp, locations="iso_alpha",
                    color="lifeExp", # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [38]:
fig = px.choropleth(locationmode = 'country names', locations = ["Hong Kong"], scope="asia" )
fig.show()

In [39]:
fig = px.choropleth(locations=["CA", "TX", "NY"], locationmode="USA-states", color=[1,2,3], scope="usa")
fig.show()

In [40]:
%pip install nbformat


Note: you may need to restart the kernel to use updated packages.


In [41]:
import plotly.express as px

In [42]:
import seaborn as sns
iris = sns.load_dataset('iris')
fig = px.scatter(iris, x="sepal_width", y="sepal_length",  trendline= 'ols')
fig.show()

In [43]:
# %pip install nbformat

In [44]:
import plotly.express as px
import seaborn as sns
iris = sns.load_dataset('iris')
fig = px.scatter(iris, x="sepal_width", y="sepal_length")
fig.show()

In [45]:
fig = px.scatter(iris, x="sepal_width", y="sepal_length", color="species")
fig.show()

In [46]:
fig = px.scatter(iris, x="sepal_width", y="sepal_length", trendline="ols", color='species')
fig.show()

In [47]:
import pandas as pd
import numpy as np

full_health_data = pd.read_csv("data.csv", header=0, sep=",")
df = pd.read_csv("data.csv", header=0, sep=",")
df.head(3)

Max_Pulse= full_health_data["Max_Pulse"]
percentile10 = np.percentile(Max_Pulse, 10)

print(percentile10)


KeyError: 'Max_Pulse'

In [None]:
df.head(3)

In [None]:
np.std(df)

In [None]:
df.std()

In [None]:
df.std()/df.mean()

In [None]:
df.var()

In [None]:
df.corr()

In [None]:
%pip install scipy

In [None]:
from scipy import stats
coefficient = stats.linregress(df["Max_Pulse"], df["Calorie_Burnage"])
slope, intercept, r_value, p_value, std_err = coefficient
print(coefficient)

In [None]:
import statsmodels.formula.api as smf

model = smf.ols(formula='Calorie_Burnage ~ Max_Pulse', data=df)
results = model.fit()
print(results.summary())

In [None]:
r_value**2

In [None]:
%pip install scikit-learn

In [None]:
import numpy
from sklearn.metrics import r2_score

x = [1,2,3,5,6,7,8,9,10,12,13,14,15,16,18,19,21,22]
y = [100,90,80,60,60,55,60,65,70,70,75,76,78,79,90,99,99,100]

mymodel = numpy.poly1d(numpy.polyfit(x, y, 3))

print(r2_score(y, mymodel(x)))

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv("data2.csv", header=0, sep=",")

In [None]:
df.head(3)

In [None]:
X = df[['Weight', 'Volume']]
y = df['CO2']

In [None]:
X

In [None]:
y

In [None]:
from sklearn import linear_model
regr_model= linear_model.LinearRegression()
regr_model.fit(X, y)

In [None]:
predictedCO2 = regr_model.predict([[2300, 1300]])
print(predictedCO2)

In [None]:
print(regr_model.coef_)

In [None]:
regr_model.predict([[3300, 1300]])

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaledX = scaler.fit_transform(X)
print(scaledX)

In [None]:
import pandas
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()

df = pandas.read_csv("data2.csv")

X = df[['Weight', 'Volume']]
y = df['CO2']

scaledX = scale.fit_transform(X)

regr = linear_model.LinearRegression()
regr.fit(scaledX, y)

scaled = scale.transform([[2300, 1.3]])

predictedCO2 = regr.predict([scaled[0]])
print(predictedCO2)