In [None]:
%%time
import pandas as pd
import altair as alt
from vega_datasets import data

df = data.gapminder()


In [None]:
df = df.astype({'country':'category'}); df.query('cluster == 0').drop_duplicates().sample(40)

In [None]:
df.head()

In [None]:
alt.Chart(df.query("year == 2000")).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('country:O')   
    
)

In [None]:
# Q and O
alt.Chart(df.query("year == 2000")).mark_point().encode(
    alt.X('fertility:Q'),
    # alt.Y('cluster:Q')   # qualitative data type will add grid and zero based scaling automatically
    # alt.Y('cluster:N')   # nominal data type will treat data as categorical and unoredered
    alt.Y('cluster:O')   # ordinal data type will treat data as categorical and oredered
       
)

In [None]:
# Q and Q scatterplot. It add two zero based X and Y scales
alt.Chart(df.query("year == 2000")).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q')
       
)

In [None]:
# Q and Q scatterplot. It add two zero based X and Y scales
alt.Chart(df.query("year == 2000")).mark_point().encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q')
       
)

In [None]:
# Q and Q scatterplot. It add two zero based X and Y scales
alt.Chart(df.query("year == 2000")).mark_point(filled=False).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1400])),
    alt.Color('cluster:N')
       
)

In [None]:
# Q and Q scatterplot. It add two zero based X and Y scales
alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1400])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    alt.Shape('cluster:N'), 
    tooltip=['country', 'pop']
       
)

In [None]:
# Q and Q scatterplot. It add two zero based X and Y scales
alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1400])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    tooltip=['country', 'pop']
       
)

In [None]:
# Q and Q scatterplot. It add two zero based X and Y scales
alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1400])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    tooltip=[
        alt.Tooltip('country:N'),
        alt.Tooltip('pop:Q', title='population'),
        alt.Tooltip('life_expect:Q', title='life expectancy')
    ]
       
)

In [None]:
%%time
# Column and Row functionality

alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1400])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    tooltip=[
        alt.Tooltip('country:N'),
        alt.Tooltip('pop:Q', title='population'),
        alt.Tooltip('life_expect:Q', title='life expectancy')
    ],
       
)

In [None]:
# Column and Row functionality

alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1400])),
    alt.Color('cluster:N' ),
    alt.OpacityValue(0.8),
    # alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    alt.Column('cluster:N'),
    # alt.Row('cluster:N'),
    alt.Tooltip('country:N'),

       
).properties(width=100, height=100)

In [None]:
# playing with positining of legend

alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', 
             scale=alt.Scale(range=[0, 1500]),
             legend=alt.Legend(orient='bottom', titleOrient='left')),
    alt.Color('cluster:N',legend=None ),
    alt.OpacityValue(0.8),
    # alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    alt.Column('cluster:N'),
    # alt.Row('cluster:N'),
    alt.Tooltip('country:N'),

       
).properties(width=100, height=100)

In [None]:
# playing with positining of legend

alt.Chart(df.query("year == 2000")).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', 
             scale=alt.Scale(range=[0, 1500]),
             legend=alt.Legend(orient='bottom', titleOrient='left')),
    alt.Color('cluster:N',legend=None ),
    alt.OpacityValue(0.8),
    # alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    alt.Column('year:O'),
    # alt.Row('cluster:N'),
    alt.Tooltip('country:N'),

       
).properties(width=100, height=100)

In [None]:
# playing with positining of legend

alt.Chart(df).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', 
             scale=alt.Scale(range=[0, 1500]),
             legend=alt.Legend(orient='bottom', titleOrient='left')),
    alt.Color('cluster:N',legend=None ),
    alt.OpacityValue(0.8),
    # alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    alt.Column('year:O'),
    # alt.Row('cluster:N'),
    alt.Tooltip('country:N'),

       
).properties(width=80, height=80)

In [None]:
# playing with positining of legend

alt.Chart(df).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', 
             scale=alt.Scale(range=[0, 1500]),
             legend=alt.Legend(orient='bottom', titleOrient='left')),
    alt.Color('cluster:N',legend=None ),
    alt.OpacityValue(0.8),
    # alt.Shape('cluster:N'), 
    alt.Order('pop:Q', sort='descending'),
    alt.Column('cluster:N'),
    # alt.Row('cluster:N'),
    alt.Tooltip(['country:N', 'year:N']),

       
).properties(width=80, height=80)

In [None]:
# working with slider / selector
yearslider = alt.selection_single(
    name='select year',
    fields=['year'],
    init={'year': 1955},
    bind=alt.binding_range(min=1955, max=2005, step=5)
)


alt.Chart(df).mark_point(filled=True).encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False)),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Size('pop:Q', 
             scale=alt.Scale(range=[0, 1200]),
             legend=alt.Legend(orient='bottom', titleOrient='left')),
    alt.Color('cluster:N',legend=None ),
    alt.OpacityValue(0.4),
    alt.Order('pop:Q', sort='descending'),
    alt.Tooltip('country:N'),

).add_selection(yearslider).transform_filter(yearslider)

In [None]:
# playing with different chart types

alt.Chart(df.query("year == 2000")).mark_point(filled=True, size=150).encode(
    alt.X('fertility:Q'),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Shape('cluster:N')
)

alt.Chart(df.query("year == 2000")).mark_point(filled=False, size=150).encode(
    alt.X('fertility:Q'),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Shape('cluster:N')
)

alt.Chart(df.query("year == 2000")).mark_square().encode(
    alt.X('fertility:Q'),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Shape('cluster:N')
)

In [None]:
alt.Chart(df.query("year == 2000")).mark_tick(color='magenta').encode(
    alt.X('fertility:Q'),   # zero=False removes the zero based axis scaling
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False)),

)

In [None]:
alt.Chart(df.query("year == 2000")).mark_bar(color='lightblue').encode(
    alt.X('country:N', sort='ascending'),
    alt.Y('pop:Q'),

).properties(width=900)

In [None]:
alt.Chart(df.query("year == 2000")).mark_bar().encode(
    alt.X('cluster:N', sort='ascending'),
    alt.Y('pop:Q'),
    alt.Color('country:N'),
    tooltip='country:N'

)

In [None]:
# playting with the option of not starting the bar from zero scale, but form the minimum value

alt.Chart(df.query("year == 2000")).mark_bar().encode(
    alt.X('life_expect:Q', scale=alt.Scale(zero=False)),
    alt.Y('cluster:N')
    )

In [None]:
# playting with the option of not starting the bar from zero scale, but form the minimum value

alt.Chart(df.query("year == 2000")).mark_bar().encode(
    alt.X('min(life_expect):Q'),
    alt.X2('max(life_expect):Q'),
    alt.Y('cluster:N')
    )

In [None]:
# line

alt.Chart(df).mark_line().encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    
    )

In [None]:
# line

alt.Chart(df).mark_line().encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N'),
    tooltip='country:N'
    
        ).properties(
            width=500
        )

In [None]:
# line

alt.Chart(df).mark_line(strokeWidth=3).encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N'),
    tooltip='country:N'
    
        ).properties(
            width=500
        )

In [None]:
# smoothing the line, interpolating data

alt.Chart(df).mark_line(strokeWidth=3,
                        interpolate='monotone').encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N'),
    tooltip='country:N'
    
        ).properties(
            width=500
        )

In [None]:
# compater only two years

alt.Chart(df.query('year == 1955 | year == 2005')).mark_line().encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N'),
    tooltip='country:N'
    
        ).properties(
            width=400
        )

In [None]:
# compater only two years

alt.Chart(df.query('year == 1955 | year == 2005')).mark_line().encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N'),
    tooltip='country:N'
    
        ).properties(
            width={'step':300}
        )

In [None]:
indonesia = df.query('country == "Indonesia"')
indonesia.shape

In [None]:
alt.Chart(indonesia).mark_area(
    interpolate='monotone'        # smoothes out the line on the chart
    ).encode(
    alt.X('year:O'),
    alt.Y('fertility:Q')
       
)

In [None]:
alt.Chart(df).mark_area(
    interpolate='monotone'        # smoothes out the line on the chart
    ).encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N')
       
)

In [None]:
cluster4 = df.query('cluster == 4')

alt.Chart(cluster4).mark_area(
    interpolate='monotone'        # smoothes out the line on the chart
    ).encode(
    alt.X('year:O'),
    alt.Y('fertility:Q'),
    alt.Color('country:N')
       
)

In [None]:
cluster4 = df.query('cluster == 4')

alt.Chart(cluster4).mark_area(
    interpolate='monotone'        # smoothes out the line on the chart
    ).encode(
    alt.X('year:O'),
    alt.Y('fertility:Q', stack='center'),
    alt.Color('country:N')
       
)

In [None]:
# we can normalize our data in the viz
cluster4 = df.query('cluster == 4')

alt.Chart(cluster4).mark_area(
    interpolate='monotone'        # smoothes out the line on the chart
    ).encode(
    alt.X('year:O'),
    alt.Y('fertility:Q', stack='normalize'),
    alt.Color('country:N')
       
)

In [None]:
# we can normalize our data in the viz
cluster4 = df.query('cluster == 4')

alt.Chart(cluster4).mark_area(
    interpolate='monotone'        # smoothes out the line on the chart
    ).encode(
    alt.X('year:O'),
    alt.Y('min(fertility):Q'),
    alt.Y2('max(fertility):Q')

       
)