# Quiz1.

In [1]:
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import HoverTool
from bokeh.models import ColumnDataSource
from bokeh.models import CategoricalColorMapper
from bokeh.palettes import Spectral6
from bokeh.models.widgets import Panel, Tabs

In [2]:
output_notebook()

In [3]:
life = pd.read_csv('../data/gapminder.csv')
life = life[life['Year']==1970]
life.head()

Unnamed: 0,Country,Year,fertility,life,population,child_mortality,gdp,region
6,Afghanistan,1970,7.671,36.663,11964906.0,307.8,1174.0,South Asia
56,Albania,1970,5.05,66.948,2135599.0,107.98,3712.0,Europe & Central Asia
106,Algeria,1970,7.641,50.366,13746185.0,242.2,7227.0,Middle East & North Africa
156,Angola,1970,7.301,37.032,5926333.0,262.29,5397.0,Sub-Saharan Africa
206,Antigua and Barbuda,1970,3.684,65.898,65587.0,55.87,5807.0,America


In [4]:
source = ColumnDataSource(life)
mapper = CategoricalColorMapper(factors=list(life['region'].unique()), palette=Spectral6)

#Hover 그래프
hover = HoverTool(tooltips=[('Country', '@Country'), ('Fertility', '@fertility'), ('Life Expectancy', '@life'), ('GDP', '@gdp')])
p1 = figure(plot_width=700, plot_height=400, tools=[hover],
              x_axis_label='Fertility', y_axis_label='Life Expectancy', title='Life Expectancy in 1970')
p1.circle('fertility', 'life', color={'field':'region', 'transform':mapper}, legend='region', source=source, size=5)
p1.legend.location = 'bottom_left'

#Box Select 그래프
p2 = figure(plot_width=700, plot_height=400, tools='box_select, reset',
              x_axis_label='Fertility', y_axis_label='Life Expectancy', title='Life Expectancy in 1970')
p2.circle('fertility', 'life', color={'field':'region', 'transform':mapper}, legend='region', source=source, size=5,
          selection_color='red', nonselection_fill_alpha=0.3, nonselection_fill_color='grey')
p2.legend.location = 'bottom_left'

tab1 = Panel(child=p1, title='Hover')
tab2 = Panel(child=p2, title='Box Select')

tabs = Tabs(tabs=[tab1, tab2])
show(tabs)

# Quiz2.

In [19]:
from bokeh.io import push_notebook
import ipywidgets as widgets
from ipywidgets import interact
from ipywidgets.embed import embed_minimal_html
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
movie = pd.read_csv('../data/movies.csv')
movie.head()

Unnamed: 0,title,distributor,genre,release_time,time,screening_rat,director,dir_prev_bfnum,dir_prev_num,num_staff,num_actor,box_off_num
0,개들의 전쟁,롯데엔터테인먼트,액션,2012-11-22,96,청소년 관람불가,조병옥,,0,91,2,23398
1,내부자들,(주)쇼박스,느와르,2015-11-19,130,청소년 관람불가,우민호,1161602.5,2,387,3,7072501
2,은밀하게 위대하게,(주)쇼박스,액션,2013-06-05,123,15세 관람가,장철수,220775.25,4,343,4,6959083
3,나는 공무원이다,(주)NEW,코미디,2012-07-12,101,전체 관람가,구자홍,23894.0,2,20,6,217866
4,불량남녀,쇼박스(주)미디어플렉스,코미디,2010-11-04,108,15세 관람가,신근호,1.0,1,251,2,483387


In [34]:
@interact(xx=widgets.IntText(value=1000000), yy=widgets.IntText(value=1000000))

def update(xx, yy):
    data = movie[(movie['box_off_num'] < xx) & (movie['dir_prev_bfnum']<yy)]
    a = sns.jointplot(x='box_off_num', y='dir_prev_bfnum', data=data)
    a.fig.suptitle('Joint Plot')
    plt.show()

interactive(children=(IntText(value=1000000, description='xx'), IntText(value=1000000, description='yy'), Outp…

----------

# 개인실습과제

BigContest NsShop 홈쇼핑 데이터 활용

### 1. 라이브러리 불러오기

In [174]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.layouts import row
import ipywidgets as widgets
from ipywidgets import interact
import pandas as pd
import warnings
plt.rcParams['font.family'] = 'NanumGothic'
warnings.filterwarnings(action='ignore')

### 2. 데이터 불러오기

In [77]:
retail = pd.read_csv('../data/retail.csv', encoding='CP949')
retail = retail.iloc[:,1:]
retail.head()

Unnamed: 0,방송일시,노출(분),마더코드,상품코드,상품명,상품군,판매단가,취급액,판매량,date,month,day,time,hour,minute,계절
0,2019-01-01 6:00,20,100346,201072,테이트 남성 셀린니트3종,의류,39900,2099000.0,52.606516,2019-01-01,1,1,6:00:00,6,0,겨울
1,2019-01-01 6:00,20,100346,201079,테이트 여성 셀린니트3종,의류,39900,4371000.0,109.548872,2019-01-01,1,1,6:00:00,6,0,겨울
2,2019-01-01 6:20,20,100346,201072,테이트 남성 셀린니트3종,의류,39900,3262000.0,81.754386,2019-01-01,1,1,6:20:00,6,20,겨울
3,2019-01-01 6:20,20,100346,201079,테이트 여성 셀린니트3종,의류,39900,6955000.0,174.310777,2019-01-01,1,1,6:20:00,6,20,겨울
4,2019-01-01 6:40,20,100346,201072,테이트 남성 셀린니트3종,의류,39900,6672000.0,167.218045,2019-01-01,1,1,6:40:00,6,40,겨울


### 3. 데이터 전처리

In [None]:
raw_retail = pd.read_excel('../../4. 데이터분석 프로젝트/data/01_제공데이터/2020 빅콘테스트 데이터분석분야-챔피언리그_2019년 실적데이터.xlsx', header=1)

In [None]:
notNA = raw_retail['노출(분)'].notna() #중요! 같은 시간대 방송 중 하나만 추출해주는 조건

In [152]:
furniture = retail[notNA].groupby('상품군').get_group('가구')
furniture = pd.merge(furniture['취급액'], furniture['판매단가'], left_index=True, right_index=True)

In [153]:
bedding = retail[notNA].groupby('상품군').get_group('침구')
bedding = pd.merge(bedding['취급액'], bedding['판매단가'], left_index=True, right_index=True)

### 4. bokeh로 그래프 그리기

#### 1) 가구와 침구의 판매단가&취급액 상관관계

In [165]:
plot1 = figure(plot_width=400, plot_height=400, x_axis_label='판매단가(만원)', y_axis_label='취급액(만원)',
               title='[가구] 판매단가&취급액', tools='box_select, reset')
plot1.circle(furniture['판매단가']/10000, furniture['취급액']/10000, color='orange', size=5)

plot2 = figure(plot_width=400, plot_height=400, x_axis_label='판매단가(만원)', y_axis_label='취급액(만원)',
               title='[침구] 판매단가&취급액', tools='box_select, reset')
plot2.circle(bedding['판매단가']/10000, bedding['취급액']/10000, color='blue', size=5)

layout = row(plot1, plot2)
show(layout)

#### 2) 상품군별 판매단가/취급액/판매량 상관관계 분석

In [190]:
cate = list(retail['상품군'].unique())
cate.remove('무형')
@interact(category = cate)

def update(category):
    retail2 = retail[retail['상품군']==category]
    a = sns.heatmap(retail2[['판매단가','취급액','판매량']].corr(), annot=True, linewidths=1, cmap='RdYlGn_r')
    plt.title('{} 상관관계 분석'.format(category))
    plt.show()

interactive(children=(Dropdown(description='category', options=('의류', '속옷', '주방', '농수축', '이미용', '가전', '생활용품', …