In [5]:
import sys
import plotly
import cufflinks as cf
import pandas as pd
import numpy as np

### 버전확인

In [6]:
print(sys.version)
print(plotly.__version__)
print(cf.__version__)
print(pd.__version__)
print(np.__version__)

3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]
5.3.1
0.17.3
1.2.4
1.20.1


#### 오프라인에서도 실행 가능하도록 하는 명령

In [7]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [8]:
# 100 행 4 열 안에 무작위 숫자 배열
dat = np.random.rand(100,4)
df = pd.DataFrame(dat, columns="A B C D".split())
df.head()

Unnamed: 0,A,B,C,D
0,0.54002,0.728768,0.194657,0.371933
1,0.357738,0.940789,0.759235,0.590612
2,0.623739,0.020384,0.192998,0.708336
3,0.003818,0.385919,0.081156,0.115424
4,0.558601,0.610242,0.556231,0.765279


In [9]:
# A열의 데이터로 선형 그래프 그리기(# 설명 보기 : SHIFT + TAB)
df['A'].iplot(kind='line', xTitle='데이터 idx', yTitle='y축 - 값', title='100개의 값의 선 그래프')

#### 1-7 (추가) A, C열 그려보기

In [10]:
df[ ["A", "C"] ].iplot(kind='line')

In [11]:
# 산점도 그래프(x, y 값이 수치형)
df.iplot(kind='scatter', x='A', y="B", mode='markers+text', size=10)

In [12]:
# 색깔 설정
df.iplot(kind='scatter', x='A', y="B", mode='markers+text', size=10, color='#3f9142')

In [13]:
# 심볼 설정
df.iplot(kind='scatter', x='A', y="B", mode='markers+text', size=10, color='#3f9142', symbol=15)

In [14]:
# 바 그래프 그리기
df2 = pd.DataFrame({'items':['bag','apple','cap'],
                    'Values':[32,43,50,]})
df2

Unnamed: 0,items,Values
0,bag,32
1,apple,43
2,cap,50


In [15]:
df2.iplot(kind='bar', x='items', y='Values')

In [16]:
df = pd.DataFrame(np.random.rand(10,3),
                  columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.9559,0.385905,0.108132
1,0.458329,0.198966,0.914003
2,0.249452,0.214694,0.374568
3,0.496157,0.902554,0.478305
4,0.797645,0.513212,0.260111


In [17]:
# 0,1,2,3,4에 대한 A,B,C의 값
df.iplot(kind='bar')

In [18]:
df.iplot(kind='barh', barmode='stack')

In [19]:
df.iplot(kind='box')

In [20]:
# 3D Surface Plot
df3 = pd.DataFrame({'x':[1,2,3,4,5],
 'y':[10,20,30,40,60],
 'z':[5,4,3,2,1]})
df3

Unnamed: 0,x,y,z
0,1,10,5
1,2,20,4
2,3,30,3
3,4,40,2
4,5,60,1


In [21]:
df3.iplot(kind='surface', colorscale='rdylbu')

In [22]:
# 선 그래프
df = cf.datagen.lines()  
df.shape

(100, 5)

In [23]:
df.iplot(kind='line')

### 테마 설정

In [24]:
themes = cf.getThemes()
themes

['ggplot', 'pearl', 'solar', 'space', 'white', 'polar', 'henanigans']

In [25]:
data = pd.Series(range(10))
for theme in themes:
 data.iplot(kind='bar', theme=theme, title=theme)

### 타이타닉 데이터 가져오기

In [26]:
import seaborn as sns

In [27]:
titanic = sns.load_dataset('titanic')

In [28]:
titanic.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,891.0,891.0,714.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,0.0,1.0,0.42,0.0,0.0,0.0
25%,0.0,2.0,20.125,0.0,0.0,7.9104
50%,0.0,3.0,28.0,0.0,0.0,14.4542
75%,1.0,3.0,38.0,1.0,0.0,31.0
max,1.0,3.0,80.0,8.0,6.0,512.3292


In [29]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [30]:
# class별 요금
fare_by_class = titanic[['pclass', 'fare']]
fare_by_class

Unnamed: 0,pclass,fare
0,3,7.2500
1,1,71.2833
2,3,7.9250
3,1,53.1000
4,3,8.0500
...,...,...
886,2,13.0000
887,1,30.0000
888,3,23.4500
889,1,30.0000


In [31]:
# 산점도로 클래스 별 요금 그래프 그리기
fare_by_class.iplot(kind='scatter',x='pclass', y='fare', mode='markers', size=10)

In [32]:
# 나이
age = titanic[['age']]
age

Unnamed: 0,age
0,22.0
1,38.0
2,26.0
3,35.0
4,35.0
...,...
886,27.0
887,19.0
888,
889,26.0


In [33]:
age.iplot(kind='histogram')

## Plotly express 기본

In [34]:
# 라이브러리 로드
import plotly.express as px

In [35]:
# iris에 대한 설명
print(px.data.iris.__doc__)


    Each row represents a flower.

    https://en.wikipedia.org/wiki/Iris_flower_data_set

    Returns:
        A `pandas.DataFrame` with 150 rows and the following columns:
        `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.


In [36]:
iris = px.data.iris()
print(iris.shape)
iris.head()

(150, 6)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [41]:
fig = px.scatter(iris, x='sepal_width', y='sepal_length', color="species", 
                 marginal_y='violin', marginal_x='box', trendline='ols')
fig.show()

In [44]:
fig = px.scatter_matrix(iris, dimensions=["sepal_width",
                                         "sepal_length",
                                        "petal_width",
                                        "petal_length"],
                                        color="species")
fig.show()

## 다차원 범주형 데이터 시각화

In [45]:
import seaborn as sns

In [46]:
tips = px.data.tips()
tips['size'].unique() # 중복 값을 제외한 유일한 값들을 보여준다.

array([2, 3, 4, 1, 6, 5], dtype=int64)

In [48]:
tips['size'].value_counts() # 각 값의 개수

2    156
3     38
4     37
5      5
1      4
6      4
Name: size, dtype: int64

In [50]:
# 범주형 데이터 시각화
fig = px.parallel_categories(tips, color='size')
fig.show()

## gapminder 데이터 셋 시각화

* gapminder 데이터 셋 ?
    * 국가별 경제 수준과 의료 동향 수준을 정리한 DataSet이다.

In [51]:
df = px.data.gapminder()
print(df.shape)
print(df.info())
print(px.data.gapminder.__doc__)
# Dtype이 object형은 범주형 데이터가 많다.

(1704, 8)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB
None

    Each row represents a country on a given year.

    https://www.gapminder.org/data/

    Returns:
        A `pandas.DataFrame` with 1704 rows and the following columns:
        `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
        'iso_alpha', 'iso_num']`.
        If `datetimes` is True, the 'year' column will be a datetime column
        If `centroids` is True, two new columns are added:

### 데이터 설명
* continent : 대륙
* country : 나라
* gdpPercap : 1인당 국민소득
* pop : 인구
* lifeExp : 기대수명

In [54]:
gapminder = px.data.gapminder()

fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp')
fig.show()

In [55]:
fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', marginal_y='violin', marginal_x='box', trendline='ols')
fig.show()

In [56]:
gapminder.corr() # 상관관계 (-1 ~ 1)

Unnamed: 0,year,lifeExp,pop,gdpPercap,iso_num
year,1.0,0.435611,0.082308,0.227318,1.82477e-14
lifeExp,0.4356112,1.0,0.064955,0.583706,-0.006534901
pop,0.08230808,0.064955,1.0,-0.0256,-0.05980741
gdpPercap,0.2273181,0.583706,-0.0256,1.0,0.008441696
iso_num,1.82477e-14,-0.006535,-0.059807,0.008442,1.0


## 다중 그래프 그리기

* plotly은 다음과 같음.
    * 01 make_subplots 이용하여 틀(레이아웃 만들기)
    * 02 add_trace 을 이용하여 레이아웃 위에 그래프 그리기
    * 03 updte_layout을 이용하여 축제목 등을 추가하기

In [57]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [74]:
fig = make_subplots(rows=2, cols=2, shared_yaxes=True)
fig

In [75]:
gap_1952 = gapminder.query('year==1952')
gap_2007 = gapminder.query('year==2007')

gap_1952_line = go.Scatter(x=gap_1952["gdpPercap"], 
                        y=gapminder["lifeExp"], 
                        mode='markers')

gap_2007_line = go.Scatter(x=gap_2007["gdpPercap"], 
                        y=gap_2007["lifeExp"], 
                        mode='markers')

In [76]:
fig.add_trace(gap_1952_line, row=1, col=1)
fig.add_trace(gap_2007_line, row=2, col=1)

In [78]:
# 그래프 업데이트 하기
layout = {
    "xaxis": {
        "title": "1인당 국민소득",
    },
    "yaxis": {
        "title": "기대수명"
    },
    
    "xaxis2": {
        "title": "1인당 국민소득",
    },
    "yaxis2": {
        "title": "기대수명"
    }
}

fig.update_layout(layout)

In [62]:
gapminder = px.data.gapminder()
fig = px.scatter(gapminder.query("year==2007"),
         x="gdpPercap",
         y="lifeExp",
         size="pop",
         color="continent",     # 색 구분 : 대륙
         hover_name="country",  # 마우스 클릭 나라명
         log_x=True, size_max=60)
fig.show()

In [63]:
fig = px.scatter(gapminder,
                animation_frame='year',
                animation_group='country',
                x='gdpPercap',
                y='lifeExp',
                size='pop',
                color='continent',
                hover_name='country', log_x=True, size_max=60
                )
fig

In [64]:
import plotly.express as px
px.defaults.width = 700
px.defaults.height = 600

fig = px.scatter(gapminder,
         animation_frame="year",
         animation_group="country",
         x="gdpPercap",
         y="lifeExp",
         size="pop",
         color="continent",
         hover_name="country", 
         facet_col="continent",
         log_x=True, size_max=60)

fig

In [66]:
fig = px.scatter(gapminder, x="gdpPercap", y="lifeExp",
         animation_frame="year",
         animation_group="country",
         size="pop",
         color="continent",
         hover_name="country",
         facet_col="continent",
         log_x=True, size_max=45, range_x=[100,100000], range_y=[20,90])
fig.show()

## 막대 그래프

In [65]:
import plotly.express as px
tips = px.data.tips()
fig = px.bar(tips, x="sex", y="total_bill", color="smoker", barmode="group")
fig.show()

In [67]:
df = px.data.election()
print(df.shape)
print(df.head())
print(df.columns)
print(px.data.election.__doc__)

(58, 8)
                district  Coderre  Bergeron  Joly  total    winner     result  \
0     101-Bois-de-Liesse     2481      1829  3024   7334      Joly  plurality   
1  102-Cap-Saint-Jacques     2525      1163  2675   6363      Joly  plurality   
2   11-Sault-au-Récollet     3348      2770  2532   8650   Coderre  plurality   
3           111-Mile-End     1734      4782  2514   9030  Bergeron   majority   
4         112-DeLorimier     1770      5933  3044  10747  Bergeron   majority   

   district_id  
0          101  
1          102  
2           11  
3          111  
4          112  
Index(['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result',
       'district_id'],
      dtype='object')

    Each row represents voting results for an electoral district in the 2013 Montreal
    mayoral election.

    Returns:
        A `pandas.DataFrame` with 58 rows and the following columns:
        `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'dist

In [68]:
import plotly.express as px
election = px.data.election()
fig = px.line_3d(election, 
                 x="Joly", y="Coderre", z="Bergeron", 
                 color="winner", line_dash="winner")
fig.show()

## Map

In [69]:
fig = px.scatter_geo(gapminder,
             locations="iso_alpha",
             color="continent",
             hover_name="country",
             size="pop",
            animation_frame="year", projection="natural earth")
fig.show()

In [70]:
import plotly.express as px
df = px.data.gapminder().query("year == 2007")
fig = px.line_geo(df, locations="iso_alpha",
                  color="continent", # "continent" is one of the columns of gapminder
                  projection="orthographic")
fig.show()