In [21]:
import numpy as np
import pandas as pd

In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [5]:
dates = pd.date_range('20130101', periods=6)
print(dates)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)

                   A         B         C         D
2013-01-01  0.505171  0.475401 -0.678363 -1.928800
2013-01-02 -0.950898 -0.030158  0.868578 -0.971927
2013-01-03 -0.073831  0.233819 -0.636073 -0.871967
2013-01-04 -1.172567 -0.714168  1.361316  0.268124
2013-01-05 -0.155747  0.055817 -1.283415  0.745360
2013-01-06  0.623192  0.438010 -0.220281 -0.980431


Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [8]:
df2 = pd.DataFrame({'A': 1., 'B': pd.Timestamp('20130102'), 'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'), 'E': pd.Categorical(["test", "train", "test", "train"]),'F': 'foo'})
print(df2)

     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo


The columns of the resulting DataFrame have different dtypes.

In [9]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [10]:
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [17]:
df2.tail(3)

Unnamed: 0,A,B,C,D,E,F
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [15]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [18]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

describe() shows a quick statistic summary of your data:

In [23]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.204113,0.076453,-0.09804,-0.623273
std,0.735176,0.436239,1.010978,0.967793
min,-1.172567,-0.714168,-1.283415,-1.9288
25%,-0.752111,-0.008665,-0.667791,-0.978305
50%,-0.114789,0.144818,-0.428177,-0.921947
75%,0.360421,0.386962,0.596363,-0.016899
max,0.623192,0.475401,1.361316,0.74536


Transposing your data:

In [24]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.505171,-0.950898,-0.073831,-1.172567,-0.155747,0.623192
B,0.475401,-0.030158,0.233819,-0.714168,0.055817,0.43801
C,-0.678363,0.868578,-0.636073,1.361316,-1.283415,-0.220281
D,-1.9288,-0.971927,-0.871967,0.268124,0.74536,-0.980431


Sorting by an axis:

In [25]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.9288,-0.678363,0.475401,0.505171
2013-01-02,-0.971927,0.868578,-0.030158,-0.950898
2013-01-03,-0.871967,-0.636073,0.233819,-0.073831
2013-01-04,0.268124,1.361316,-0.714168,-1.172567
2013-01-05,0.74536,-1.283415,0.055817,-0.155747
2013-01-06,-0.980431,-0.220281,0.43801,0.623192


Sorting by values:

In [26]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,-1.172567,-0.714168,1.361316,0.268124
2013-01-02,-0.950898,-0.030158,0.868578,-0.971927
2013-01-05,-0.155747,0.055817,-1.283415,0.74536
2013-01-03,-0.073831,0.233819,-0.636073,-0.871967
2013-01-06,0.623192,0.43801,-0.220281,-0.980431
2013-01-01,0.505171,0.475401,-0.678363,-1.9288


In [27]:
df['A']

2013-01-01    0.505171
2013-01-02   -0.950898
2013-01-03   -0.073831
2013-01-04   -1.172567
2013-01-05   -0.155747
2013-01-06    0.623192
Freq: D, Name: A, dtype: float64

In [30]:
df[0:3] #Selecting via [], which slices the rows.

Unnamed: 0,A,B,C,D
2013-01-01,0.505171,0.475401,-0.678363,-1.9288
2013-01-02,-0.950898,-0.030158,0.868578,-0.971927
2013-01-03,-0.073831,0.233819,-0.636073,-0.871967


For getting a cross section using a label:

In [31]:
df.loc[dates[0]]

A    0.505171
B    0.475401
C   -0.678363
D   -1.928800
Name: 2013-01-01 00:00:00, dtype: float64

In [32]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,0.505171,0.475401
2013-01-02,-0.950898,-0.030158
2013-01-03,-0.073831,0.233819
2013-01-04,-1.172567,-0.714168
2013-01-05,-0.155747,0.055817
2013-01-06,0.623192,0.43801


#Selection by Position
##Select via the position of the passed integers:

In [34]:
df.iloc[3]

A   -1.172567
B   -0.714168
C    1.361316
D    0.268124
Name: 2013-01-04 00:00:00, dtype: float64

By integer slices, acting similar to numpy/python:

In [35]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-1.172567,-0.714168
2013-01-05,-0.155747,0.055817
