# **1. Intro**

## 환경 설정, 데이터 불러오기



In [None]:
import altair as alt
import pandas as pd

In [None]:
from vega_datasets import data

data.list_datasets()

In [None]:
cars = data.cars()
cars.head()

## Tidy Dataset

In [None]:
pew_url = "https://raw.githubusercontent.com/nickhould/tidy-data-python/master/data/pew-raw.csv"
pew_df = pd.read_csv(pew_url)
pew_df

In [None]:
formatted_df = pd.melt(pew_df,["religion"], var_name="income", value_name="freq")
formatted_df = formatted_df.sort_values(by=["religion"])
formatted_df.head(10)

In [None]:
weather_no_tidy =  pd.DataFrame({
    'city': ['Seattle', 'New York','Chicago'],
    'Apr': [2.68, 3.94, 3.62],
    'Aug': [0.97, 4.13, 3.98],
    'Dec': [5.31, 3.58, 2.56],
})

weather_no_tidy

In [None]:
weather_tidy = pd.DataFrame({
    'city': ['Seattle', 'Seattle', 'Seattle', 'New York', 'New York', 'New York', 'Chicago', 'Chicago', 'Chicago'],
    'month': ['Apr', 'Aug', 'Dec','Apr', 'Aug', 'Dec','Apr', 'Aug', 'Dec'],
    'precip': [2.68, 0.97, 5.31, 3.94, 4.13, 3.58, 3.62, 3.98, 2.56]
})

weather_tidy

## Simple Example

### bar chart

In [None]:
source = pd.DataFrame({
    'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
    'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})

# TODO (36p)


In [None]:
source2 = alt.Data(values=[{'a': 'A', 'b': 28},
                        {'a': 'B', 'b': 55},
                        {'a': 'C', 'b': 43},
                        {'a': 'D', 'b': 91},
                        {'a': 'E', 'b': 81},
                        {'a': 'F', 'b': 53},
                        {'a': 'G', 'b': 19},
                        {'a': 'H', 'b': 87},
                        {'a': 'I', 'b': 52}])

# TODO (37p)


### Scatterplot

In [None]:
url = data.cars.url
# TODO (39p)


In [None]:
cars = data.cars()
# TODO (39p)

## Wide-form vs Long-form

In [None]:
wide_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01'],
                          'AAPL': [189.95, 182.22, 198.08],
                          'AAMZ': [89.15, 90.56, 92.64],
                          'GOOG': [707.00, 693.00, 691.48]
                          })
wide_form

In [None]:
long_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01',
                                  '2007-10-01', '2007-11-01', '2007-12-01',
                                  '2007-10-01', '2007-11-01', '2007-12-01'],
                         'company': ['AAPL', 'AAPL', 'AAPL',
                                     'AMZN', 'AMZN', 'AMZN',
                                     'GOOG', 'GOOG', 'GOOG',],
                         'price': [190, 182, 198, 90, 91, 92, 707, 690, 691]})


long_form 

In [None]:
# TODO (44p)
alt.Chart(long_form).mark_line().encode(
    
)

# **2. Encodings**

In [None]:
from vega_datasets import data

cars = data.cars()

alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
    shape='Origin'
)

## Short-form vs Long-form

In [None]:
# TODO (49p)
alt.Chart(cars).mark_point().encode(
    
)

In [None]:
# TODO (49p)
alt.Chart(cars).mark_point().encode(
    
)

In [None]:
gapminder = data.gapminder()
data2000 = gapminder.loc[gapminder['year'] == 2000]
data2000.head(5)

## X, Y

In [None]:
# TODO (51-52p)
alt.Chart(data2000).mark_point().encode(
    
)

### Data Types

In [None]:
pop = data.population()


base = alt.Chart(pop).mark_bar().encode(
    # TODO (53p)
    
).properties(
    width=200,
    height=200
)

alt.hconcat(
  base.encode(
      x='year:Q'
      ).properties(title='year:quantitative'),
  base.encode(
      x='year:O'
      ).properties(title='year:ordinal')
)

In [None]:
# TODO (54p)
alt.Chart(data2000).mark_point().encode(
    
)

### Zero Baseline, Nice number

In [None]:
# TODO (55-56p)
alt.Chart(data2000).mark_point().encode(
    
)

## Size

In [None]:
# TODO (57-58p)
alt.Chart(data2000).mark_point().encode(

)


## Color, Opacity, Shape

In [None]:
# TODO (60-61p)
base = alt.Chart(cars).mark_point().encode(
    
).properties(
    width=150,
    height=150
)



In [None]:
# TODO (62-67p)
alt.Chart(data2000).mark_point().encode(
    
)

## Tooltip

In [None]:
# TODO (69-70p)
alt.Chart(data2000).mark_point(filled=True).encode(
    
)

## Ordering

In [None]:
barley = data.barley()

# TODO (71p)
alt.Chart(barley).mark_bar().encode(
    
)

In [None]:
# TODO (72p)
driving = data.driving()


alt.Chart(driving).mark_line(point=True).encode(
    )

## Colum, Row Facets

In [None]:
# TODO (75-77p)
alt.Chart(data2000).mark_point(filled=True).encode(
    alt.X('fertility:Q'),
    alt.Y('life_expect:Q'),
    alt.Size('pop:Q', scale=alt.Scale(range=[0, 1000])),
    alt.Color('cluster:N', legend=None),
    alt.OpacityValue(0.5),
    alt.Tooltip('country:N'),
    alt.Order('pop:Q', sort='descending'),

)

## Binning, Aggregation

In [None]:
cars = data.cars()

# TODO (79-81p)
alt.Chart(cars).mark_bar().encode(
    alt.X('Horsepower'),
    y='count()'
)

## Sorting

In [None]:
barley = data.barley()


base = alt.Chart(barley).mark_bar().encode(
    y='mean(yield):Q',
    color=alt.Color('mean(yield):Q', legend=None)
).properties(width=100, height=100)

In [None]:
# TODO (82-83p)

ascending | descending | explicit | sortfield

In [None]:
alt.Chart(barley).mark_rect().encode(
    alt.X('mean(yield):Q', sort='ascending'),
    # TODO (84p)
)

# sort=['Morris', 'Duluth', 'Grand Rapids', 'University Farm', 'Waseca', 'Crookston']

# **3. Marks**

## Point

In [None]:
alt.Chart(data2000).mark_point().encode(
    alt.X('fertility:Q'),
    alt.Y('cluster:N'),
    alt.Shape('cluster:N')
)

### Circle

In [None]:
alt.Chart(data2000)

### Square

In [None]:
alt.Chart(data2000)

## Tick

In [None]:
alt.Chart(data2000)

## Bar

In [None]:
alt.Chart(data2000).mark_bar().encode(
    alt.X('country:N'),
    alt.Y('pop:Q')
)

## Line

In [None]:
alt.Chart(gapminder)

## Area

In [None]:
dataUS = gapminder.loc[gapminder["country"] == "United States"]
dataUS.head()

In [None]:
alt.Chart(dataUS).mark_area().encode(
    
)

In [None]:
dataNA = gapminder.loc[
                  (gapminder["country"] == "United States") | 
                  (gapminder["country"] == "Canada") | 
                  (gapminder["country"] == "Mexico") 
                  ]

    
dataNA.head()

In [None]:
alt.Chart(dataNA).mark_area().encode(
    
)

## Text

In [None]:
texts = alt.Chart(data2000).mark_text(
    align='left',
    baseline='middle',
    dx=6
).encode(
    
)

texts

In [None]:
bars = alt.Chart(data2000)

bars

# **4. Data Transformation**

## Bin

In [None]:
movies = data.movies()

print(movies.shape)
movies.head()

In [None]:
alt.Chart(movies).mark_circle().encode(
    alt.X('Rotten_Tomatoes_Rating:Q'),
    alt.Y('IMDB_Rating'),
)

## Aggregate

In [None]:
alt.Chart(movies).mark_bar().encode(
    alt.X('average(Rotten_Tomatoes_Rating):Q'),
    alt.Y('Major_Genre:N')
)

## Calculate

In [None]:
# TODO (38p)
alt.Chart(movies)

## Filter

In [None]:
alt.Chart(movies).mark_circle().encode(
    alt.X('Rotten_Tomatoes_Rating:Q'),
    alt.Y('IMDB_Rating:Q')
)

## 응용

In [None]:
selection = alt.selection_single(fields=['Major_Genre'])

top=alt.Chart().mark_circle().encode(
    x='Rotten_Tomatoes_Rating:Q',
    y='IMDB_Rating:Q'
).properties(
    width=600, height=200
).transform_filter(
    selection
)

bottom = alt.Chart().mark_bar().encode(
    x='Major_Genre:O',
    y='count()',
    color=alt.condition(selection, alt.value('steelblue'), alt.value('lightgray'))
).properties(
    width=600, height=100,
    selection=selection
)

alt.vconcat(
    top, 
    bottom,
    data=movies
)