In [1]:
import plotly.plotly as py 
# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# 離線製圖
import plotly.graph_objs as go

import pandas as pd
import numpy as np

In [4]:
'''
使用pandas匯入資料，
並以Americas, Europe兩個大陸的資料製作 Life Expectancy v. Per Capita GDP,2007
'''
df = pd.read_csv('https://raw.githubusercontent.com/yankev/test/master/life-expectancy-per-GDP-2007.csv')

americas = df[(df.continent=='Americas')]
europe = df[(df.continent=='Europe')]

trace_comp0 = go.Scatter(
    x=americas.gdp_percap,
    y=americas.life_exp,
    mode='markers',
    marker=dict(size=12, 
                line=dict(width=1), 
                color='navy'
            ),
    name='Americas',
    text=americas.country,
)

trace_comp1 = go.Scatter(
    x=europe.gdp_percap,
    y=europe.life_exp,
    mode='markers',
    marker=dict(size=12,
                line=dict(width=1),
                color='red'
            ),
    name='Europe',
    text=europe.country,
)

data = [trace_comp0, trace_comp1]
layout = go.Layout(
    title='Life Expectancy v. Per Capita GDP,2007',
    hovermode='closest',
    xaxis=dict(
        title='GDP per capita (2000 dollars)',
        ticklen=5,
        zeroline=False,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Life Expectancy (years)',
        ticklen=5,
        gridwidth=2,
    ),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='life_expectancy_per_GDP_2007')

data

In [45]:
'''用numpy產生的數值製圖'''
x = np.arange(1, 3.2, 0.2)
y = 6*np.sin(x)
y

array([5.04882591, 5.59223452, 5.91269838, 5.99744162, 5.84308579,
       5.45578456, 4.85097842, 4.05277908, 3.09300823, 2.0099289 ,
       0.84672005])

In [47]:
trace1 = go.Scatter(x=[1,2,3],y=[4,5,6],
                    name='1st'
                   )
trace2 = go.Scatter(x=x, 
                    y=y,
                    marker={'color':'blue',
                            'symbol':'star',
                            'size': 10
                           },
                    mode='markers',
                    name='2nd'
                   )
data2 = [trace1, trace2]
layout2 = go.Layout(title='numpy',xaxis={'title':'x1'},yaxis={'title':'x2'})
py.iplot(go.Figure(data=data2, layout=layout2), filename='py3')


In [48]:
'''使用annotations註解(3,6)為Highest point'''
layout2.update(dict(annotations=[go.layout.Annotation(text='Highest point', x=3, y=6)]))
py.iplot(go.Figure(data=data2, layout=layout2), filename='py4')

In [8]:
'''標注範圍'''
layout2.update(dict(shapes = [
        {
            'type': 'rect',
            'xref': 'x',
            'yref': 'y',
            #'1' or 1 is ok.
            'x0': '1',
            'x1': '2',
            'y0': '5',
            'y1': 7,
            'fillcolor': 'green',
            'opacity': 0.2,
            'line': {
                'width': 0,
            }
        }
]))

answer = py.iplot(go.Figure(data=data2, layout=layout2), filename='py4')
answer

In [2]:
'''開始練習cuffflinks'''
import cufflinks as cf
print(cf.__version__)

0.15


In [3]:
df = cf.datagen.lines()

# 一般製圖
# py.iplot([{
#     'x': df.index,
#     'y': df[col],
#     'name': col
# }   for col in df.columns], filename='cufflink line')

# in cufflinks 可以直接製圖
df.iplot(kind='scatter', filename='cufflink line')


In [4]:
cf.go_offline() # 讓圖檔不上傳到 plotly 的雲端
#cf.go_online() # 讓圖檔上傳到 plotly 的雲端（預設）
df.iplot(kind='scatter', filename='cufflink line2')


In [16]:
df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum()
df.iplot(filename='cufflinks line chart')

In [17]:
df.iplot(x='A', y='B', filename='x-A y-B')

In [21]:
'''匯入csv並製作bar chart'''
df = pd.read_csv('https://raw.githubusercontent.com/plotly/widgets/master/ipython-examples/311_150k.csv', parse_dates=True, index_col=1)

series = df['Complaint Type'].value_counts()[:20]
series.head(3) #series.head() 為前五筆資料

HEAT/HOT WATER            32202
Street Light Condition     7558
Blocked Driveway           6997
Name: Complaint Type, dtype: int64

In [24]:
series.iplot(kind='bar', yTitle='Number of Complaint', title='NYC 311 Complaints', filename='cufflinks bar chart')

In [160]:
'''iloc篩選資料'''
df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D'])
# np.random.rand(10, 4) 為 10*4 的陣列，故Ａ、Ｂ、Ｃ、Ｄ各有10筆資料

row = df.iloc[5] #row = df.loc[5]
# 第六筆資料
row = df.iloc[0:3] #row = df.loc[0:3]
# 第一到第三筆資料

row.iplot(kind='bar', filename='cufflinks row bar chart')

In [161]:
'''stack bar chart'''
df.iplot(kind='bar', barmode='stack',
         filename='cufflinks grouped bar chart')

In [162]:
'''group barh chart'''
df.iplot(kind='barh', barmode='group',
        filename='cufflinks barh')

In [108]:
cf.getThemes() #列出主題

['polar', 'space', 'ggplot', 'pearl', 'solar', 'white', 'henanigans']

In [5]:
cf.set_config_file(theme='pearl') #使用主題

In [6]:
'''列出10條（bins=10）長方形的stack histogram chart'''
df = pd.DataFrame({
    'a': np.random.randn(1000) + 1,
    'b': np.random.randn(1000),
    'c': np.random.randn(1000) - 1,
})

In [193]:
df.iplot(kind='histogram', barmode='stack', bins=10,
         #histnorm='density', histfunc='max',
         filename='cufflinks histogram')

In [7]:
'''box chart'''
df = pd.DataFrame(np.random.rand(10, 5), 
                  columns=['A', 'B', 'C', 'D', 'E'])
df.iplot(kind='box', filename='cufflinks box')

In [8]:
# !!! cufflinks 的 plotly.tools.make_subplots 存在 bug，無法使用則到 open source 找 cufflinks/tools
'''area subplots chart'''
df.iplot(kind='area', subplots=True, shape=(5,1), fill=True, filename='cufflinks area')

In [19]:
'''cufflinks的缺點'''
df = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')
df2007 = df[df.year==2007]
df1952 = df[df.year==1952]

df2007.iplot(kind='scatter', mode='markers', 
            x='gdpPercap', y='lifeExp', 
            filename='cufflinks scatter')
# cufflinks 不適合做多欄位 scatter plots

In [26]:
'''bubble chart'''
df2007.iplot(kind='bubble', x='gdpPercap', y='lifeExp',
            size='pop', text='country', xTitle='GDP',
            yTitle='Life Expectancy', filename='cufflinks bubble')

In [12]:
'''匯入四筆資料後，製作共享Ｘ軸的subplots chart'''
df = cf.datagen.lines(4)
df.iplot(subplots=True, shape=(4,1), 
         shared_xaxes=True, fill=True,
         subplot_titles=True, legend=False,
         filename='cufflinks subplots')

In [14]:
df.scatter_matrix(filename='cufflinks scatter matrix')

In [17]:
'''匯入資料後直接製作heatmap chart'''
cf.datagen.heatmap(20,20).iplot(kind='heatmap',
                               colorscale='spectral',
                               filename='cufflinks heatmap')

In [59]:
'''在圖上畫出一條垂直線與兩條水平線'''
cf.datagen.lines(3, columns=['a', 'b', 'c']).iplot(hline={'y0': 2,
                                                          'color': 'green',
                                                         }, vline=['2015-02-10'])

In [55]:
'''在圖上畫出垂直涵蓋範圍與水平涵蓋範圍'''
cf.datagen.lines(3, columns=['a', 'b', 'c']).iplot(hspan=[(-1,1),(2,4)],
                                                   vspan={
                                                       'x0': '2015-2-15',
                                                       'x1': '2015-3-15',
                                                       'color': 'rgba(0,40,40,0.3)',
                                                       'fill': True,
                                                       'opacity':.4,
                                                   }, filename='cufflinks shaded regions')


<module 'cufflinks' from '/Users/simon/Virtualenvs/plotly3.10/lib/python2.7/site-packages/cufflinks/__init__.pyc'>


In [41]:
'''將 cufflinks 製作的圖用 asFigure=True 轉成可修改的圖'''
df = cf.datagen.lines(4)
fig = df.iplot(kind='scatter', asFigure=True, filename='cufflinks scatter for asFigure')
print(fig)

Figure({
    'data': [{'line': {'color': 'rgba(255, 153, 51, 1.0)', 'dash': 'solid', 'shape': 'linear', 'width': 1.3},
              'mode': 'lines',
              'name': 'OUM.IM',
              'text': '',
              'type': 'scatter',
              'uid': '7fa88f86-b576-4c0c-82de-90642edc2b23',
              'x': [2015-01-01, 2015-01-02, 2015-01-03, 2015-01-04, 2015-01-05,
                    2015-01-06, 2015-01-07, 2015-01-08, 2015-01-09, 2015-01-10,
                    2015-01-11, 2015-01-12, 2015-01-13, 2015-01-14, 2015-01-15,
                    2015-01-16, 2015-01-17, 2015-01-18, 2015-01-19, 2015-01-20,
                    2015-01-21, 2015-01-22, 2015-01-23, 2015-01-24, 2015-01-25,
                    2015-01-26, 2015-01-27, 2015-01-28, 2015-01-29, 2015-01-30,
                    2015-01-31, 2015-02-01, 2015-02-02, 2015-02-03, 2015-02-04,
                    2015-02-05, 2015-02-06, 2015-02-07, 2015-02-08, 2015-02-09,
                    2015-02-10, 2015-02-11, 2015-02-12, 20

In [42]:
fig['layout']['yaxis'].update({
    'title': 'Price',
    'tickprefix': '$',
})
for i, trace in enumerate(fig['data']):
    trace['name'] = 'Trace {}'.format(i)
    
py.iplot(fig, filename='cufflinks customized')

In [43]:
help(df.iplot)

Help on method _iplot in module cufflinks.plotlytools:

_iplot(self, kind='scatter', data=None, layout=None, filename='', sharing=None, title='', xTitle='', yTitle='', zTitle='', theme=None, colors=None, colorscale=None, fill=False, width=None, dash='solid', mode='', interpolation='linear', symbol='circle', size=12, barmode='', sortbars=False, bargap=None, bargroupgap=None, bins=None, histnorm='', histfunc='count', orientation='v', boxpoints=False, annotations=None, keys=False, bestfit=False, bestfit_colors=None, mean=False, mean_colors=None, categories='', x='', y='', z='', text='', gridcolor=None, zerolinecolor=None, margin=None, labels=None, values=None, secondary_y='', secondary_y_title='', subplots=False, shape=None, error_x=None, error_y=None, error_type='data', locations=None, lon=None, lat=None, asFrame=False, asDates=False, asFigure=False, asImage=False, dimensions=None, asPlot=False, asUrl=False, online=None, **kwargs) method of pandas.core.frame.DataFrame instance
          