In [57]:
import numpy as np
import pandas as pd
import altair as alt
from vega_datasets import data
from pathlib import Path
import seaborn as sns
import altair_catplot

#### Histogram

In [4]:
car_data  = data.cars() 

In [6]:
display(car_data)

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA
...,...,...,...,...,...,...,...,...,...
401,ford mustang gl,27.0,4,140.0,86.0,2790,15.6,1982-01-01,USA
402,vw pickup,44.0,4,97.0,52.0,2130,24.6,1982-01-01,Europe
403,dodge rampage,32.0,4,135.0,84.0,2295,11.6,1982-01-01,USA
404,ford ranger,28.0,4,120.0,79.0,2625,18.6,1982-01-01,USA


In [7]:
car_data.columns

Index(['Name', 'Miles_per_Gallon', 'Cylinders', 'Displacement', 'Horsepower',
       'Weight_in_lbs', 'Acceleration', 'Year', 'Origin'],
      dtype='object')

In [22]:
hist=alt.\
Chart(data=car_data,width=600).\
mark_bar(
    opacity=0.9,
    ).\
encode(
    x=alt.X(shorthand="Acceleration"),
    y="count()",
    color=alt.Color(shorthand="Origin")
)
hist.show()


In [14]:
hist.save(
    fp=Path("plotImages").joinpath('hist1.png'),
    format="png")

In [17]:
hist=alt.\
Chart(data=car_data,width=600).\
mark_bar().\
encode(
    x=alt.X(shorthand="Acceleration",
            bin=alt.BinParams(maxbins=40)),
    y="count()",
    color=alt.Color(shorthand="Origin")
)
hist.show()

#### Overlapping Histogram

In [23]:
df=pd.DataFrame(
    data={
        "Col A": np.random.normal(loc=-1,scale=1,size=1000,),
        'Col B': np.random.normal(loc=0,scale=1,size=1000)
    }
)

In [24]:
df.head()

Unnamed: 0,Col A,Col B
0,-1.191301,-1.891564
1,-1.211933,0.261723
2,-0.98167,0.140567
3,-1.839366,-0.916167
4,-0.294214,-0.862083


In [42]:
## With df.melt
dfMelt=pd.melt(df, 
                  id_vars=df.index.name, 
                  value_vars=df.columns, 
                  var_name='Columns', 
                  value_name='Values') 
dfMelt.sample(n=5)

Unnamed: 0,Columns,Values
118,Col A,-0.842295
36,Col A,-0.732013
736,Col A,-2.219883
864,Col A,-0.933011
576,Col A,-1.37054


In [51]:
hist=alt.\
Chart(data=dfMelt).\
mark_area(
    opacity=0.6,
    interpolate="step"
).\
encode(
    x=alt.X(
        shorthand="Values",
        bin=alt.Bin(maxbins=30)
        ),
    y=alt.Y(
        shorthand="count()",
        stack=None
    ),
    color=alt.Color(shorthand="Columns")
)
hist.show()

In [70]:
## without df.melt

hist2=alt.\
Chart(data=df).\
transform_fold(
    fold=['Col A','Col B'],
    as_=['Columns',	'Values']
).\
mark_area(
    opacity=0.3,
    interpolate='step'
).\
encode(
    x=alt.X(
        shorthand="Values:Q",
        bin=alt.BinParams(maxbins=30),
            ),
    y=alt.Y(
        shorthand="count()",
        stack=None
        ),
    color=alt.Color(shorthand="Columns:N")
)
hist2.show()

In [55]:
hist2.save(
    fp=Path("plotImages").joinpath("histOverlapping2.png"),
    format="png"
           )

#### BoxPlot

In [58]:
tip=sns.load_dataset(name="tips")

In [60]:
tip.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


#### Bubble Plot

In [71]:
cars=data.cars()

In [72]:
cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [78]:
bubble=alt\
.Chart(
    data=cars,
    width=600,
    height=500)\
.mark_point(opacity=.5,
            filled=False,
            color='grey'
            )\
.encode(
    x=alt.X(shorthand="Acceleration"),
    y=alt.Y(shorthand="Displacement"),
    size=alt.Size(shorthand="Cylinders:N")
)

bubble.show()