In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [6]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [7]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [8]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774
2013-01-02,0.302906,0.418324,1.266007,-0.31836
2013-01-03,0.407828,-0.424464,-0.213497,1.361696
2013-01-04,0.529709,2.033023,1.346366,-1.196953
2013-01-05,0.769554,-1.234981,0.409763,0.933051
2013-01-06,2.027231,1.284101,-1.490457,0.616624


In [9]:
df2 = pd.DataFrame({'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [20]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774
2013-01-02,0.302906,0.418324,1.266007,-0.31836
2013-01-03,0.407828,-0.424464,-0.213497,1.361696
2013-01-04,0.529709,2.033023,1.346366,-1.196953
2013-01-05,0.769554,-1.234981,0.409763,0.933051


In [12]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [13]:
df.columns

Index([u'A', u'B', u'C', u'D'], dtype='object')

In [14]:
df.values

array([[-0.33843446, -0.66070537, -0.30972623, -2.02577354],
       [ 0.30290646,  0.41832414,  1.26600661, -0.31835973],
       [ 0.40782829, -0.42446425, -0.2134968 ,  1.36169555],
       [ 0.52970859,  2.03302329,  1.34636559, -1.19695344],
       [ 0.76955411, -1.23498064,  0.4097629 ,  0.93305106],
       [ 2.02723149,  1.28410063, -1.49045651,  0.61662433]])

In [15]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.616466,0.235883,0.168076,-0.104953
std,0.784207,1.246204,1.074882,1.317326
min,-0.338434,-1.234981,-1.490457,-2.025774
25%,0.329137,-0.601645,-0.285669,-0.977305
50%,0.468768,-0.00307,0.098133,0.149132
75%,0.709593,1.067657,1.051946,0.853944
max,2.027231,2.033023,1.346366,1.361696


In [16]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.338434,0.302906,0.407828,0.529709,0.769554,2.027231
B,-0.660705,0.418324,-0.424464,2.033023,-1.234981,1.284101
C,-0.309726,1.266007,-0.213497,1.346366,0.409763,-1.490457
D,-2.025774,-0.31836,1.361696,-1.196953,0.933051,0.616624


In [17]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-2.025774,-0.309726,-0.660705,-0.338434
2013-01-02,-0.31836,1.266007,0.418324,0.302906
2013-01-03,1.361696,-0.213497,-0.424464,0.407828
2013-01-04,-1.196953,1.346366,2.033023,0.529709
2013-01-05,0.933051,0.409763,-1.234981,0.769554
2013-01-06,0.616624,-1.490457,1.284101,2.027231


In [18]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-05,0.769554,-1.234981,0.409763,0.933051
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774
2013-01-03,0.407828,-0.424464,-0.213497,1.361696
2013-01-02,0.302906,0.418324,1.266007,-0.31836
2013-01-06,2.027231,1.284101,-1.490457,0.616624
2013-01-04,0.529709,2.033023,1.346366,-1.196953


In [19]:
df['A']

2013-01-01   -0.338434
2013-01-02    0.302906
2013-01-03    0.407828
2013-01-04    0.529709
2013-01-05    0.769554
2013-01-06    2.027231
Freq: D, Name: A, dtype: float64

In [21]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774
2013-01-02,0.302906,0.418324,1.266007,-0.31836
2013-01-03,0.407828,-0.424464,-0.213497,1.361696


In [22]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,0.302906,0.418324,1.266007,-0.31836
2013-01-03,0.407828,-0.424464,-0.213497,1.361696
2013-01-04,0.529709,2.033023,1.346366,-1.196953


In [23]:
df.loc[dates[0]]

A   -0.338434
B   -0.660705
C   -0.309726
D   -2.025774
Name: 2013-01-01 00:00:00, dtype: float64

In [24]:
df.loc[:,['A','B']]

Unnamed: 0,A,B
2013-01-01,-0.338434,-0.660705
2013-01-02,0.302906,0.418324
2013-01-03,0.407828,-0.424464
2013-01-04,0.529709,2.033023
2013-01-05,0.769554,-1.234981
2013-01-06,2.027231,1.284101


In [25]:
df.loc['20130102':'20130104',['A','B']]

Unnamed: 0,A,B
2013-01-02,0.302906,0.418324
2013-01-03,0.407828,-0.424464
2013-01-04,0.529709,2.033023


In [26]:
df.loc['20130102',['A','B']]

A    0.302906
B    0.418324
Name: 2013-01-02 00:00:00, dtype: float64

In [27]:
df.loc[dates[0],'A']

-0.33843445927953592

In [28]:
df.at[dates[0],'A']

-0.33843445927953592

In [29]:
df.iloc[3]

A    0.529709
B    2.033023
C    1.346366
D   -1.196953
Name: 2013-01-04 00:00:00, dtype: float64

In [30]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2013-01-01,-0.660705,-0.309726
2013-01-02,0.418324,1.266007
2013-01-03,-0.424464,-0.213497
2013-01-04,2.033023,1.346366
2013-01-05,-1.234981,0.409763
2013-01-06,1.284101,-1.490457


In [31]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-02,0.302906,0.418324,1.266007,-0.31836
2013-01-03,0.407828,-0.424464,-0.213497,1.361696
2013-01-04,0.529709,2.033023,1.346366,-1.196953
2013-01-05,0.769554,-1.234981,0.409763,0.933051
2013-01-06,2.027231,1.284101,-1.490457,0.616624


In [35]:
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774,one
2013-01-02,0.302906,0.418324,1.266007,-0.31836,one
2013-01-03,0.407828,-0.424464,-0.213497,1.361696,two
2013-01-04,0.529709,2.033023,1.346366,-1.196953,three
2013-01-05,0.769554,-1.234981,0.409763,0.933051,four
2013-01-06,2.027231,1.284101,-1.490457,0.616624,three


In [36]:
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,0.407828,-0.424464,-0.213497,1.361696,two
2013-01-05,0.769554,-1.234981,0.409763,0.933051,four


In [37]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [40]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1],'E'] = 1
df1

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774,1.0
2013-01-02,0.302906,0.418324,1.266007,-0.31836,1.0
2013-01-03,0.407828,-0.424464,-0.213497,1.361696,
2013-01-04,0.529709,2.033023,1.346366,-1.196953,


In [41]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774,1.0
2013-01-02,0.302906,0.418324,1.266007,-0.31836,1.0


In [44]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774,1.0
2013-01-02,0.302906,0.418324,1.266007,-0.31836,1.0
2013-01-03,0.407828,-0.424464,-0.213497,1.361696,5.0
2013-01-04,0.529709,2.033023,1.346366,-1.196953,5.0


In [45]:
df1.fillna(value=2)

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774,1.0
2013-01-02,0.302906,0.418324,1.266007,-0.31836,1.0
2013-01-03,0.407828,-0.424464,-0.213497,1.361696,2.0
2013-01-04,0.529709,2.033023,1.346366,-1.196953,2.0


In [46]:
pd.isnull(df1)

Unnamed: 0,A,B,C,D,E
2013-01-01,False,False,False,False,False
2013-01-02,False,False,False,False,False
2013-01-03,False,False,False,False,True
2013-01-04,False,False,False,False,True


In [47]:
df.mean()

A    0.616466
B    0.235883
C    0.168076
D   -0.104953
dtype: float64

In [48]:
df.mean(1)

2013-01-01   -0.833660
2013-01-02    0.417219
2013-01-03    0.282891
2013-01-04    0.678036
2013-01-05    0.219347
2013-01-06    0.609375
Freq: D, dtype: float64

In [51]:
s = pd.Series([1,2,5,np.nan,6,8], index=dates).shift(2)
s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    2.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [52]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D
2013-01-01,,,,
2013-01-02,,,,
2013-01-03,-0.592172,-1.424464,-1.213497,0.361696
2013-01-04,-1.470291,0.033023,-0.653634,-3.196953
2013-01-05,-4.230446,-6.234981,-4.590237,-4.066949
2013-01-06,,,,


In [53]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D
2013-01-01,-0.338434,-0.660705,-0.309726,-2.025774
2013-01-02,-0.035528,-0.242381,0.95628,-2.344133
2013-01-03,0.3723,-0.666845,0.742784,-0.982438
2013-01-04,0.902009,1.366178,2.089149,-2.179391
2013-01-05,1.671563,0.131197,2.498912,-1.24634
2013-01-06,3.698794,1.415298,1.008456,-0.629716


In [55]:
df.apply(lambda x: x.max()- x.min())

A    2.365666
B    3.268004
C    2.836822
D    3.387469
dtype: float64

In [56]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    6
1    2
2    4
3    6
4    4
5    5
6    1
7    0
8    3
9    2
dtype: int32

In [59]:
s.value_counts()

6    2
4    2
2    2
5    1
3    1
1    1
0    1
dtype: int64

In [60]:
s.value_counts(3)

6    0.2
4    0.2
2    0.2
5    0.1
3    0.1
1    0.1
0    0.1
dtype: float64

In [61]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [63]:
df = pd.DataFrame(np.random.randn(10,4))
df

Unnamed: 0,0,1,2,3
0,0.9989,-1.241259,-0.180902,0.336967
1,-0.553085,-0.395712,-1.131826,-0.092154
2,1.521868,-0.888964,-1.633286,-1.228839
3,-0.596553,0.502804,0.175623,-1.123661
4,0.56022,0.124113,1.554745,0.925993
5,-0.597479,1.678972,0.130115,0.588691
6,-0.466069,-0.007517,0.894773,0.783417
7,-0.435793,0.682738,1.371607,-0.643155
8,-0.622282,0.051984,0.060105,0.116271
9,-0.828534,-0.423109,-0.55084,-0.595978


In [64]:
pieces = [df[:3], df[3:7], df[7:]]
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.9989,-1.241259,-0.180902,0.336967
1,-0.553085,-0.395712,-1.131826,-0.092154
2,1.521868,-0.888964,-1.633286,-1.228839
3,-0.596553,0.502804,0.175623,-1.123661
4,0.56022,0.124113,1.554745,0.925993
5,-0.597479,1.678972,0.130115,0.588691
6,-0.466069,-0.007517,0.894773,0.783417
7,-0.435793,0.682738,1.371607,-0.643155
8,-0.622282,0.051984,0.060105,0.116271
9,-0.828534,-0.423109,-0.55084,-0.595978
