In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [50]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s


0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [51]:
dates = pd.date_range('20130101', periods=6)
dates


DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [52]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df


Unnamed: 0,A,B,C,D
2013-01-01,0.840045,0.445744,-0.826137,-1.569876
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201
2013-01-03,1.345284,-1.140696,0.166721,-1.604203
2013-01-04,1.177324,1.756869,0.229433,-1.500779
2013-01-05,0.493914,-0.177929,-0.827754,0.346665
2013-01-06,0.510942,-0.237856,0.033164,-1.363723


In [53]:
df2 = pd.DataFrame(
    {
        'A': 1.0,
        'B': pd.Timestamp('20130102'),
        'C': pd.Series(1, index=list(range(4)), dtype='float32'),
        'D': np.array([3] * 4, dtype='int32'),
        'E': pd.Categorical(['test', 'train', 'test', 'train']),
        'F': 'foo',
    }
)

df2


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [54]:
df2.dtypes


A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [55]:
dir(df2)


['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdiv

In [56]:
df.head()


Unnamed: 0,A,B,C,D
2013-01-01,0.840045,0.445744,-0.826137,-1.569876
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201
2013-01-03,1.345284,-1.140696,0.166721,-1.604203
2013-01-04,1.177324,1.756869,0.229433,-1.500779
2013-01-05,0.493914,-0.177929,-0.827754,0.346665


In [57]:
df.tail()


Unnamed: 0,A,B,C,D
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201
2013-01-03,1.345284,-1.140696,0.166721,-1.604203
2013-01-04,1.177324,1.756869,0.229433,-1.500779
2013-01-05,0.493914,-0.177929,-0.827754,0.346665
2013-01-06,0.510942,-0.237856,0.033164,-1.363723


In [58]:
df.tail(3)


Unnamed: 0,A,B,C,D
2013-01-04,1.177324,1.756869,0.229433,-1.500779
2013-01-05,0.493914,-0.177929,-0.827754,0.346665
2013-01-06,0.510942,-0.237856,0.033164,-1.363723


In [59]:
df.index


DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [60]:
df.columns


Index(['A', 'B', 'C', 'D'], dtype='object')

In [61]:
df.values


array([[ 8.40045274e-01,  4.45744138e-01, -8.26136574e-01,
        -1.56987601e+00],
       [-1.06184397e-03, -2.39142575e+00,  7.40173066e-01,
        -1.77120144e+00],
       [ 1.34528396e+00, -1.14069591e+00,  1.66721351e-01,
        -1.60420345e+00],
       [ 1.17732400e+00,  1.75686916e+00,  2.29433391e-01,
        -1.50077863e+00],
       [ 4.93914034e-01, -1.77928842e-01, -8.27753687e-01,
         3.46665172e-01],
       [ 5.10942466e-01, -2.37855847e-01,  3.31635389e-02,
        -1.36372288e+00]])

In [62]:
df.describe()


Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.727741,-0.290882,-0.080733,-1.243853
std,0.495767,1.406125,0.625774,0.790489
min,-0.001062,-2.391426,-0.827754,-1.771201
25%,0.498171,-0.914986,-0.611312,-1.595622
50%,0.675494,-0.207892,0.099942,-1.535327
75%,1.093004,0.289826,0.213755,-1.397987
max,1.345284,1.756869,0.740173,0.346665


In [63]:
df.T


Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.840045,-0.001062,1.345284,1.177324,0.493914,0.510942
B,0.445744,-2.391426,-1.140696,1.756869,-0.177929,-0.237856
C,-0.826137,0.740173,0.166721,0.229433,-0.827754,0.033164
D,-1.569876,-1.771201,-1.604203,-1.500779,0.346665,-1.363723


In [64]:
df.sort_index(axis=1, ascending=False)


Unnamed: 0,D,C,B,A
2013-01-01,-1.569876,-0.826137,0.445744,0.840045
2013-01-02,-1.771201,0.740173,-2.391426,-0.001062
2013-01-03,-1.604203,0.166721,-1.140696,1.345284
2013-01-04,-1.500779,0.229433,1.756869,1.177324
2013-01-05,0.346665,-0.827754,-0.177929,0.493914
2013-01-06,-1.363723,0.033164,-0.237856,0.510942


In [65]:
df.sort_index(axis=1)


Unnamed: 0,A,B,C,D
2013-01-01,0.840045,0.445744,-0.826137,-1.569876
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201
2013-01-03,1.345284,-1.140696,0.166721,-1.604203
2013-01-04,1.177324,1.756869,0.229433,-1.500779
2013-01-05,0.493914,-0.177929,-0.827754,0.346665
2013-01-06,0.510942,-0.237856,0.033164,-1.363723


In [66]:
df.sort_values(by='B', ascending=False)


Unnamed: 0,A,B,C,D
2013-01-04,1.177324,1.756869,0.229433,-1.500779
2013-01-01,0.840045,0.445744,-0.826137,-1.569876
2013-01-05,0.493914,-0.177929,-0.827754,0.346665
2013-01-06,0.510942,-0.237856,0.033164,-1.363723
2013-01-03,1.345284,-1.140696,0.166721,-1.604203
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201


In [67]:
# 3. 데이터 선택하기 (Selection)
df['A']

2013-01-01    0.840045
2013-01-02   -0.001062
2013-01-03    1.345284
2013-01-04    1.177324
2013-01-05    0.493914
2013-01-06    0.510942
Freq: D, Name: A, dtype: float64

In [68]:
df.A

2013-01-01    0.840045
2013-01-02   -0.001062
2013-01-03    1.345284
2013-01-04    1.177324
2013-01-05    0.493914
2013-01-06    0.510942
Freq: D, Name: A, dtype: float64

In [69]:
type(df['A'])

pandas.core.series.Series

In [70]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.840045,0.445744,-0.826137,-1.569876
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201
2013-01-03,1.345284,-1.140696,0.166721,-1.604203


In [71]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-0.001062,-2.391426,0.740173,-1.771201
2013-01-03,1.345284,-1.140696,0.166721,-1.604203
2013-01-04,1.177324,1.756869,0.229433,-1.500779


In [75]:
df.loc[dates[0]]

A    0.840045
B    0.445744
C   -0.826137
D   -1.569876
Name: 2013-01-01 00:00:00, dtype: float64

In [78]:
df.loc[dates[1]]

A   -0.001062
B   -2.391426
C    0.740173
D   -1.771201
Name: 2013-01-02 00:00:00, dtype: float64

In [81]:
df.loc['20130101']
df.loc['2013-01-01']

A    0.840045
B    0.445744
C   -0.826137
D   -1.569876
Name: 2013-01-01 00:00:00, dtype: float64

In [85]:
df.loc[:,['A', 'B']]

Unnamed: 0,A,B
2013-01-01,0.840045,0.445744
2013-01-02,-0.001062,-2.391426
2013-01-03,1.345284,-1.140696
2013-01-04,1.177324,1.756869
2013-01-05,0.493914,-0.177929
2013-01-06,0.510942,-0.237856
