Code runthrough for Pandas tutorial, located at: http://pandas.pydata.org/pandas-docs/stable/10min.html#min

In [2]:
# Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Object Creation

In [5]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [9]:
dates = pd.date_range('20170201', periods=6)
dates

DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',
               '2017-02-05', '2017-02-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
x = np.random.randn(6,4)
print(x.shape)
print(x)
df = pd.DataFrame(x, index=dates, columns=list('ABCD') )
df

(6, 4)
[[-0.68748146  1.5624717  -1.18655232 -0.351009  ]
 [ 2.2283084   0.59969524 -1.10108088 -0.0092997 ]
 [ 0.43225646  1.14100044 -1.04140552 -1.17444282]
 [ 0.48063333  1.85161253 -0.89420039  0.7938157 ]
 [ 0.14387154  0.35515524 -0.43534071  0.79838422]
 [-0.82151108  2.63205374  0.04865438 -0.43489395]]


Unnamed: 0,A,B,C,D
2017-02-01,-0.687481,1.562472,-1.186552,-0.351009
2017-02-02,2.228308,0.599695,-1.101081,-0.0093
2017-02-03,0.432256,1.141,-1.041406,-1.174443
2017-02-04,0.480633,1.851613,-0.8942,0.793816
2017-02-05,0.143872,0.355155,-0.435341,0.798384
2017-02-06,-0.821511,2.632054,0.048654,-0.434894


In [21]:
# Objects that can be converted to "series-like"
df2 = pd.DataFrame({'A' : 1.,
       'B' : pd.Timestamp('20170201'),
       'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
       'D' : np.array([3] * 4, dtype='int32'),
       'E' : pd.Categorical(["test", "train", "test", "train"]),
       'F' : 'foo' })

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2017-02-01,1.0,3,test,foo
1,1.0,2017-02-01,1.0,3,train,foo
2,1.0,2017-02-01,1.0,3,test,foo
3,1.0,2017-02-01,1.0,3,train,foo


In [22]:
# Print out the types in df2.
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# Viewing Data

In [23]:
df.head()

Unnamed: 0,A,B,C,D
2017-02-01,-0.687481,1.562472,-1.186552,-0.351009
2017-02-02,2.228308,0.599695,-1.101081,-0.0093
2017-02-03,0.432256,1.141,-1.041406,-1.174443
2017-02-04,0.480633,1.851613,-0.8942,0.793816
2017-02-05,0.143872,0.355155,-0.435341,0.798384


In [24]:
df.tail()

Unnamed: 0,A,B,C,D
2017-02-02,2.228308,0.599695,-1.101081,-0.0093
2017-02-03,0.432256,1.141,-1.041406,-1.174443
2017-02-04,0.480633,1.851613,-0.8942,0.793816
2017-02-05,0.143872,0.355155,-0.435341,0.798384
2017-02-06,-0.821511,2.632054,0.048654,-0.434894


In [25]:
df.tail(1)

Unnamed: 0,A,B,C,D
2017-02-06,-0.821511,2.632054,0.048654,-0.434894


In [26]:
df.head(1)

Unnamed: 0,A,B,C,D
2017-02-01,-0.687481,1.562472,-1.186552,-0.351009


In [27]:
df.index

DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',
               '2017-02-05', '2017-02-06'],
              dtype='datetime64[ns]', freq='D')

In [28]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [29]:
df.values

array([[-0.68748146,  1.5624717 , -1.18655232, -0.351009  ],
       [ 2.2283084 ,  0.59969524, -1.10108088, -0.0092997 ],
       [ 0.43225646,  1.14100044, -1.04140552, -1.17444282],
       [ 0.48063333,  1.85161253, -0.89420039,  0.7938157 ],
       [ 0.14387154,  0.35515524, -0.43534071,  0.79838422],
       [-0.82151108,  2.63205374,  0.04865438, -0.43489395]])

In [30]:
# Get quick statistics. -- Useful!
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.296013,1.356998,-0.768321,-0.062908
std,1.097787,0.841009,0.480498,0.766232
min,-0.821511,0.355155,-1.186552,-1.174443
25%,-0.479643,0.735022,-1.086162,-0.413923
50%,0.288064,1.351736,-0.967803,-0.180154
75%,0.468539,1.779327,-0.550056,0.593037
max,2.228308,2.632054,0.048654,0.798384


In [31]:
df.T # Transpose.

Unnamed: 0,2017-02-01 00:00:00,2017-02-02 00:00:00,2017-02-03 00:00:00,2017-02-04 00:00:00,2017-02-05 00:00:00,2017-02-06 00:00:00
A,-0.687481,2.228308,0.432256,0.480633,0.143872,-0.821511
B,1.562472,0.599695,1.141,1.851613,0.355155,2.632054
C,-1.186552,-1.101081,-1.041406,-0.8942,-0.435341,0.048654
D,-0.351009,-0.0093,-1.174443,0.793816,0.798384,-0.434894


In [36]:
df.sort_index(axis=1, ascending=False) # Sort by the 1th axis (starts at 0) -- the dates.

Unnamed: 0,D,C,B,A
2017-02-01,-0.351009,-1.186552,1.562472,-0.687481
2017-02-02,-0.0093,-1.101081,0.599695,2.228308
2017-02-03,-1.174443,-1.041406,1.141,0.432256
2017-02-04,0.793816,-0.8942,1.851613,0.480633
2017-02-05,0.798384,-0.435341,0.355155,0.143872
2017-02-06,-0.434894,0.048654,2.632054,-0.821511


In [37]:
df.sort_values(by='C') # Sort by C.

Unnamed: 0,A,B,C,D
2017-02-01,-0.687481,1.562472,-1.186552,-0.351009
2017-02-02,2.228308,0.599695,-1.101081,-0.0093
2017-02-03,0.432256,1.141,-1.041406,-1.174443
2017-02-04,0.480633,1.851613,-0.8942,0.793816
2017-02-05,0.143872,0.355155,-0.435341,0.798384
2017-02-06,-0.821511,2.632054,0.048654,-0.434894


# Selection

### Getting

In [40]:
df['A'] # To select column "A"
df[0:3] # Rows 0 to 3.

Unnamed: 0,A,B,C,D
2017-02-01,-0.687481,1.562472,-1.186552,-0.351009
2017-02-02,2.228308,0.599695,-1.101081,-0.0093
2017-02-03,0.432256,1.141,-1.041406,-1.174443


In [41]:
df['20170201':'20170202'] # Select the rows from Feb1-Feb2.

Unnamed: 0,A,B,C,D
2017-02-01,-0.687481,1.562472,-1.186552,-0.351009
2017-02-02,2.228308,0.599695,-1.101081,-0.0093


### Selection by Label