# **1. Intro**

## 환경 설정, 데이터 불러오기



In [None]:
import altair as alt
import pandas as pd

In [None]:
from vega_datasets import data

data.list_datasets()

['7zip',
 'airports',
 'annual-precip',
 'anscombe',
 'barley',
 'birdstrikes',
 'budget',
 'budgets',
 'burtin',
 'cars',
 'climate',
 'co2-concentration',
 'countries',
 'crimea',
 'disasters',
 'driving',
 'earthquakes',
 'ffox',
 'flare',
 'flare-dependencies',
 'flights-10k',
 'flights-200k',
 'flights-20k',
 'flights-2k',
 'flights-3m',
 'flights-5k',
 'flights-airport',
 'gapminder',
 'gapminder-health-income',
 'gimp',
 'github',
 'graticule',
 'income',
 'iowa-electricity',
 'iris',
 'jobs',
 'la-riots',
 'londonBoroughs',
 'londonCentroids',
 'londonTubeLines',
 'lookup_groups',
 'lookup_people',
 'miserables',
 'monarchs',
 'movies',
 'normal-2d',
 'obesity',
 'ohlc',
 'points',
 'population',
 'population_engineers_hurricanes',
 'seattle-temps',
 'seattle-weather',
 'sf-temps',
 'sp500',
 'stocks',
 'udistrict',
 'unemployment',
 'unemployment-across-industries',
 'uniform-2d',
 'us-10m',
 'us-employment',
 'us-state-capitals',
 'volcano',
 'weather',
 'weball26',
 'wheat',

In [None]:
cars = data.cars()
cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


## Tidy Dataset

In [None]:
pew_url = "https://raw.githubusercontent.com/nickhould/tidy-data-python/master/data/pew-raw.csv"
pew_df = pd.read_csv(pew_url)
pew_df

In [None]:
formatted_df = pd.melt(pew_df,["religion"], var_name="income", value_name="freq")
formatted_df = formatted_df.sort_values(by=["religion"])
formatted_df.head(10)

In [None]:
weather_tidy = pd.DataFrame({
    'city': ['Seattle', 'Seattle', 'Seattle', 'New York', 'New York', 'New York', 'Chicago', 'Chicago', 'Chicago'],
    'month': ['Apr', 'Aug', 'Dec','Apr', 'Aug', 'Dec','Apr', 'Aug', 'Dec'],
    'precip': [2.68, 0.97, 5.31, 3.94, 4.13, 3.58, 3.62, 3.98, 2.56]
})

weather_tidy

Unnamed: 0,city,month,precip
0,Seattle,Apr,2.68
1,Seattle,Aug,0.97
2,Seattle,Dec,5.31
3,New York,Apr,3.94
4,New York,Aug,4.13
5,New York,Dec,3.58
6,Chicago,Apr,3.62
7,Chicago,Aug,3.98
8,Chicago,Dec,2.56


In [None]:
weather_no_tidy =  pd.DataFrame({
    'city': ['Seattle', 'New York','Chicago'],
    'Apr': [2.68, 3.94, 3.62],
    'Aug': [0.97, 4.13, 3.98],
    'Dec': [5.31, 3.58, 2.56],
})

weather_no_tidy

Unnamed: 0,city,Apr,Aug,Dec
0,Seattle,2.68,0.97,5.31
1,New York,3.94,4.13,3.58
2,Chicago,3.62,3.98,2.56


## Simple Example

### bar chart

In [None]:
source = pd.DataFrame({
    'a': ['A', 2, 'B', 'C', 'D', 'E', 'G', 'H', 'I'],
    'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})

alt.Chart(source).mark_bar().encode(
    x='a:N',
    y='b'
)

In [None]:
source2 = alt.Data(values=[{'a': 'A', 'b': 28},
                        {'a': 'B', 'b': 55},
                        {'a': 'C', 'b': 43},
                        {'a': 'D', 'b': 91},
                        {'a': 'E', 'b': 81},
                        {'a': 'F', 'b': 53},
                        {'a': 'G', 'b': 19},
                        {'a': 'H', 'b': 87},
                        {'a': 'I', 'b': 52}])

# TODO (37p)
alt.Chart(source2).mark_bar().encode(
    x='a:O',
    y='b:Q'
)

### Scatterplot

In [None]:
# TODO (39p)

url = data.cars.url
alt.Chart(url).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q'
)

In [None]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q'
)

## Wide-form vs Long-form

In [None]:
wide_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01'],
                          'AAPL': [189.95, 182.22, 198.08],
                          'AAMZ': [89.15, 90.56, 92.64],
                          'GOOG': [707.00, 693.00, 691.48]
                          })
wide_form

Unnamed: 0,Date,AAPL,AAMZ,GOOG
0,2007-10-01,189.95,89.15,707.0
1,2007-11-01,182.22,90.56,693.0
2,2007-12-01,198.08,92.64,691.48


In [None]:
long_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01',
                                  '2007-10-01', '2007-11-01', '2007-12-01',
                                  '2007-10-01', '2007-11-01', '2007-12-01'],
                         'company': ['AAPL', 'AAPL', 'AAPL',
                                     'AMZN', 'AMZN', 'AMZN',
                                     'GOOG', 'GOOG', 'GOOG',],
                         'price': [190, 182, 198, 90, 91, 92, 707, 690, 691]})


long_form 

Unnamed: 0,Date,company,price
0,2007-10-01,AAPL,190
1,2007-11-01,AAPL,182
2,2007-12-01,AAPL,198
3,2007-10-01,AMZN,90
4,2007-11-01,AMZN,91
5,2007-12-01,AMZN,92
6,2007-10-01,GOOG,707
7,2007-11-01,GOOG,690
8,2007-12-01,GOOG,691


In [None]:
# TODO (44p)
alt.Chart(long_form).mark_line().encode(
    x='Date:T',
    y='price',
    color='company:N'
)

# **2. Encodings**

In [None]:
from vega_datasets import data

cars = data.cars()

alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
    shape='Origin'
)

## Short-form vs Long-form

In [None]:
# TODO (49p)
alt.Chart(cars).mark_point().encode(
    x='Acceleration:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N'
)

In [None]:
# TODO (49p)
alt.Chart(cars).mark_point().encode(
    alt.X('Acceleration', type='quantitative'),
    alt.Y('Miles_per_Gallon', type='quantitative'),
    alt.Color('Origin', type='nominal')
)

In [None]:
gapminder = data.gapminder()
data2000 = gapminder.loc[gapminder['year'] == 2000]
data2000.head(5)

Unnamed: 0,year,country,cluster,pop,life_expect,fertility
9,2000,Afghanistan,0,23898198,42.129,7.4792
20,2000,Argentina,3,37497728,74.34,2.35
31,2000,Aruba,3,69539,73.451,2.124
42,2000,Australia,4,19164620,80.37,1.756
53,2000,Austria,1,8113413,78.98,1.382


## X, Y

In [None]:
#51
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q')
)

In [None]:
#52
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('cluster:O')
)

### Data Types

In [None]:
# 53 : Data Types
pop = data.population()

base = alt.Chart(pop).mark_bar().encode(
    alt.Y("mean(people):Q", title="total population")
).properties(
    width=200,
    height=200
)

alt.hconcat(
  base.encode(
      x='year:Q'
      ).properties(title='year:quantitative'),
  base.encode(
      x='year:O'
      ).properties(title='year:ordinal')
)

In [None]:
#54
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q')
)

### Zero Baseline, Nice number

In [None]:
# 55
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q')
)

In [None]:
# 56
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q', scale=alt.Scale(zero=False, nice=False)),
    alt.Y('life_expect:Q', scale=alt.Scale(zero=False, nice=False))
)

## Size

In [None]:
# 57
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q')
)


In [None]:
# 58
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000]))
)


## Color, Opacity, Shape

In [None]:
# 60
base = alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q'
).properties(
    width=150,
    height=150
)

alt.vconcat(
    base.encode(color='Cylinders:Q').properties(title='quantitative'),
    base.encode(color='Cylinders:O').properties(title='ordinal'),
    base.encode(color='Cylinders:N').properties(title='nominal')
)

In [None]:
# 61
alt.hconcat(
    base.encode(color='Cylinders:Q').properties(title='quantitative'),
    base.encode(color='Cylinders:O').properties(title='ordinal'),
    base.encode(color='Cylinders:N').properties(title='nominal')
)

In [None]:
# 62
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N'),
)

In [None]:
# 63
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N'),
)

In [None]:
# 65
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
)

In [None]:
# 67
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    alt.Shape('cluster:N')
)

## Tooltip

In [None]:
# 69
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    alt.Tooltip('country')
)

In [None]:
# 70
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N'),
    alt.OpacityValue(0.5),
    tooltip = [
        alt.Tooltip('country:N'),
        alt.Tooltip('fertility:Q'),
        alt.Tooltip('life_expect:Q')
    ]
)

## Ordering

In [None]:
barley = data.barley()

alt.Chart(barley).mark_bar().encode(
    x='variety:N',
    y='sum(yield):Q',
    color='site:N',
    order=alt.Order('site', sort='descending')
)

In [None]:
alt.Chart(barley).mark_bar().encode(
    x='variety:N',
    y='sum(yield):Q',
    color='site:N',
    order=alt.Order('site', sort='ascending')
)   


In [None]:
driving = data.driving()


alt.Chart(driving).mark_line(point=True).encode(
    alt.X('miles', scale=alt.Scale(zero=False)),
    alt.Y('gas', scale=alt.Scale(zero=False)),
    alt.Tooltip('year'),
    order='miles'
    )

In [None]:
alt.Chart(driving).mark_line(point=True).encode(
    alt.X('miles', scale=alt.Scale(zero=False)),
    alt.Y('gas', scale=alt.Scale(zero=False)),
    alt.Tooltip('year'),
    order='year'
)

## Colum, Row Facets

In [None]:
#75 
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N', legend=None),
    alt.OpacityValue(0.5),
    alt.Tooltip('country:N'),
    alt.Order('pop:Q', sort='descending'),
    alt.Column('cluster:N')
)

In [None]:
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N', legend=None),
    alt.OpacityValue(0.5),
    alt.Tooltip('country:N'),
    alt.Order('pop:Q', sort='descending'),
    alt.Column('cluster:N')
).properties(width=200, height=200)