In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

Series包含了一系列值和一系列索引，我們可以使用values和index屬性來訪問它們。

In [5]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [6]:
s.values

array([ 1.,  3.,  5., nan,  6.,  8.])

In [7]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
s.index

RangeIndex(start=0, stop=6, step=1)

In [9]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
data['b']

0.5

In [11]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)#Series是對應型態鍵和一組型態值的結構
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [12]:
population['California']

38332521

In [13]:
population['California':'Florida']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
dtype: int64

In [14]:
#例如，數據可以是列表或NumPy數組，在這種情況下，索引默認為整數序列：
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

In [15]:
#數據可以是純量，會被重複的填充到指定的索引：
pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

In [16]:
#data可以是字典，其中index默認為已排序的字典鍵：
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

In [17]:
#在每種情況下，如果首選不同的結果，則可以顯式設置索引：
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

In [18]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [19]:
states = pd.DataFrame({'population': population,
                       'area': area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [20]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [21]:
states['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [22]:
df2=pd.DataFrame({'A':1.,
                'B':pd.Timestamp('20170101'),
                'C':pd.Series(1, index=list(range(4)),dtype='float32'),
                'D':np.array([3]*4, dtype='int32'),
                'E':pd.Categorical(["test","train","test","train"]),
                'F':'foo'})
print(df2)

     A          B    C  D      E    F
0  1.0 2017-01-01  1.0  3   test  foo
1  1.0 2017-01-01  1.0  3  train  foo
2  1.0 2017-01-01  1.0  3   test  foo
3  1.0 2017-01-01  1.0  3  train  foo


In [24]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [25]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [27]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [28]:
df2.values

array([[1.0, Timestamp('2017-01-01 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2017-01-01 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2017-01-01 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2017-01-01 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [29]:
df2.T

Unnamed: 0,0,1,2,3
A,1,1,1,1
B,2017-01-01 00:00:00,2017-01-01 00:00:00,2017-01-01 00:00:00,2017-01-01 00:00:00
C,1,1,1,1
D,3,3,3,3
E,test,train,test,train
F,foo,foo,foo,foo


In [30]:
# 排序，對於 axis=1 以 row 方向排序 ascending=False 倒排序
print(df2.sort_index(axis=1, ascending=False))

     F      E  D    C          B    A
0  foo   test  3  1.0 2017-01-01  1.0
1  foo  train  3  1.0 2017-01-01  1.0
2  foo   test  3  1.0 2017-01-01  1.0
3  foo  train  3  1.0 2017-01-01  1.0


In [31]:
# 排序，對於 axis=0 以 col 方向反向排序 (ascending=Fals 指定排序方式)
print(df2.sort_index(axis=0, ascending=False))

     A          B    C  D      E    F
3  1.0 2017-01-01  1.0  3  train  foo
2  1.0 2017-01-01  1.0  3   test  foo
1  1.0 2017-01-01  1.0  3  train  foo
0  1.0 2017-01-01  1.0  3   test  foo


In [32]:
# 排序 sort_values 針對單行的值進行排序
print(df2.sort_values(by='E'))

     A          B    C  D      E    F
0  1.0 2017-01-01  1.0  3   test  foo
2  1.0 2017-01-01  1.0  3   test  foo
1  1.0 2017-01-01  1.0  3  train  foo
3  1.0 2017-01-01  1.0  3  train  foo


In [34]:
from pandas_datareader import data as web

In [35]:
all_data={} #create a new dict object and named it all_data

In [36]:
for ticker in ['AAPL','IBM','MSFT','GOOG']: 
    all_data[ticker]=web.get_data_yahoo(ticker,'1/1/2010','1/1/2017')

In [37]:
all_data #dict object, key:value (value 資料型別為 DataFrame)

{'AAPL':                   High         Low        Open       Close       Volume  \
 Date                                                                      
 2009-12-31   30.478571   30.080000   30.447144   30.104286   88102700.0   
 2010-01-04   30.642857   30.340000   30.490000   30.572857  123432400.0   
 2010-01-05   30.798571   30.464285   30.657143   30.625713  150476200.0   
 2010-01-06   30.747143   30.107143   30.625713   30.138571  138040000.0   
 2010-01-07   30.285715   29.864286   30.250000   30.082857  119282800.0   
 2010-01-08   30.285715   29.865715   30.042856   30.282858  111902700.0   
 2010-01-11   30.428572   29.778572   30.400000   30.015715  115557400.0   
 2010-01-12   29.967142   29.488571   29.884285   29.674286  148614900.0   
 2010-01-13   30.132856   29.157143   29.695715   30.092857  151473000.0   
 2010-01-14   30.065714   29.860001   30.015715   29.918571  108223500.0   
 2010-01-15   30.228571   29.410000   30.132856   29.418571  148516900.0   
 201

In [38]:
all_data['AAPL']

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.478571,30.080000,30.447144,30.104286,88102700.0,20.159719
2010-01-04,30.642857,30.340000,30.490000,30.572857,123432400.0,20.473503
2010-01-05,30.798571,30.464285,30.657143,30.625713,150476200.0,20.508902
2010-01-06,30.747143,30.107143,30.625713,30.138571,138040000.0,20.182680
2010-01-07,30.285715,29.864286,30.250000,30.082857,119282800.0,20.145369
2010-01-08,30.285715,29.865715,30.042856,30.282858,111902700.0,20.279305
2010-01-11,30.428572,29.778572,30.400000,30.015715,115557400.0,20.100410
2010-01-12,29.967142,29.488571,29.884285,29.674286,148614900.0,19.871763
2010-01-13,30.132856,29.157143,29.695715,30.092857,151473000.0,20.152065
2010-01-14,30.065714,29.860001,30.015715,29.918571,108223500.0,20.035355


In [39]:
all_data['GOOG']

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,310.679321,307.986847,310.356445,307.986847,2455400.0,307.986847
2010-01-04,312.721039,310.103088,311.449310,311.349976,3937800.0,311.349976
2010-01-05,311.891449,308.761810,311.563568,309.978882,6048500.0,309.978882
2010-01-06,310.907837,301.220856,310.907837,302.164703,8009000.0,302.164703
2010-01-07,303.029083,294.410156,302.731018,295.130463,12912000.0,295.130463
2010-01-08,299.675903,292.651581,294.087250,299.064880,9509900.0,299.064880
2010-01-11,300.276978,295.100647,300.276978,298.612823,14519600.0,298.612823
2010-01-12,297.147339,292.100159,296.893982,293.332153,9769600.0,293.332153
2010-01-13,292.288940,285.095734,286.382355,291.648102,13077600.0,291.648102
2010-01-14,295.180145,289.521942,290.063416,293.019196,8535300.0,293.019196
