<a href="https://colab.research.google.com/github/krakowiakpawel9/plotly-course/blob/master/02_statistical.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q chart_studio
!pip install -q --upgrade plotly

[K     |████████████████████████████████| 81kB 3.1MB/s 
[K     |████████████████████████████████| 7.1MB 2.9MB/s 
[?25h

In [28]:
from seaborn import load_dataset
import plotly.express as px
import pandas as pd


df = load_dataset('diamonds')
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [31]:
df.carat = pd.cut(df.carat, bins=10, labels=list('abcedfghij'))
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,a,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,a,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,a,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,a,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,a,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [32]:
df.carat.value_counts()

a    25155
b    18626
c     7129
e     2349
d      614
f       53
g        6
h        5
i        2
j        1
Name: carat, dtype: int64

In [33]:
px.box(df, x='carat', y='price')

### Histogram

In [34]:
df = load_dataset('mpg')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [35]:
df.describe()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
count,398.0,398.0,398.0,392.0,398.0,398.0,398.0
mean,23.514573,5.454774,193.425879,104.469388,2970.424623,15.56809,76.01005
std,7.815984,1.701004,104.269838,38.49116,846.841774,2.757689,3.697627
min,9.0,3.0,68.0,46.0,1613.0,8.0,70.0
25%,17.5,4.0,104.25,75.0,2223.75,13.825,73.0
50%,23.0,4.0,148.5,93.5,2803.5,15.5,76.0
75%,29.0,8.0,262.0,126.0,3608.0,17.175,79.0
max,46.6,8.0,455.0,230.0,5140.0,24.8,82.0


In [36]:
df.origin.value_counts()

usa       249
japan      79
europe     70
Name: origin, dtype: int64

In [39]:
import plotly.express as px

px.histogram(df, x='mpg', nbins=50, title='Rozkład zmiennej mpg')

In [40]:
px.histogram(df, x='mpg', nbins=50, title='Rozkład zmiennej mpg', histnorm='probability density')

In [43]:
px.histogram(df, x='mpg', nbins=50, title='Rozkład zmiennej mpg', histnorm='probability density',
            facet_row='origin', color='origin')

In [48]:
dff = df[(df.origin == 'usa') | (df.origin == 'europe')]
px.histogram(dff, x='mpg', color='origin', marginal='violin', title='Rozkład zmiennej mpg (usa vs. europe)', opacity=0.7)

In [49]:
px.histogram(df, x='origin')

In [54]:
import plotly.figure_factory as ff

fig = ff.create_distplot([df.mpg], group_labels=['mpg'])
fig.show()

In [55]:
fig = ff.create_distplot([df.mpg, df.acceleration], group_labels=['mpg', 'Acceleration'])
fig.show()

In [60]:
import numpy as np

x1 = np.random.randn(1000)
x2 = np.random.randn(1000) + 2

ff.create_distplot([x1, x2], ['Rozkład Normalny ~ N(0, 1)', 'Rozkład Normalny ~ N(2, 1)'], curve_type='normal',
                  bin_size=0.2, colors=['slategray', 'magenta'])

### Histogram 2D

In [61]:
df = load_dataset('mpg')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [73]:
import plotly.graph_objects as go

fig = go.Figure(go.Histogram2d(
    x=df.mpg,
    y=df.acceleration,
    nbinsx=30,
    nbinsy=30
))

fig.add_trace(go.Scatter(
    x=df.mpg,
    y=df.acceleration,
    mode='markers',
    marker={
        'symbol': 'x',
        'opacity': 0.7,
        'color': 'white',
        'size': 8,
        'line': {
            'width': 1
        }
    }
))

fig.show()