In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Show all notebook outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [8]:
# Create a series
s=pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [7]:
# Create time index
dates=pd.date_range('20130101',periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
# Create dataframe with standard normal in 6x4 array
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.109741,-0.199941,0.75937,-1.813542
2013-01-02,-0.336738,0.163098,1.158359,-2.177592
2013-01-03,1.492281,0.833172,-0.922144,0.241526
2013-01-04,0.218803,-0.528804,0.305116,-1.134102
2013-01-05,-2.032722,-0.242343,0.435498,-1.610317
2013-01-06,1.103152,-1.347008,-0.576044,-0.586502


In [19]:
# Create a dataframe with a variety of information, and their type
df2=pd.DataFrame({'A':1., \
                  'B':pd.Timestamp('20130102'), \
                  'C':pd.Series(1,index=list(range(4)),dtype='float32'),\
                  'D':np.array([3]*4,dtype='int32'),\
                  'E':pd.Categorical(['test','train','test','train']),\
                  'F':'foo'
                 })
df2
df2.dtypes

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [21]:
# Show top and bottom 5 observations
df.head()
df.tail()

Unnamed: 0,A,B,C,D
2013-01-01,1.109741,-0.199941,0.75937,-1.813542
2013-01-02,-0.336738,0.163098,1.158359,-2.177592
2013-01-03,1.492281,0.833172,-0.922144,0.241526
2013-01-04,0.218803,-0.528804,0.305116,-1.134102
2013-01-05,-2.032722,-0.242343,0.435498,-1.610317


Unnamed: 0,A,B,C,D
2013-01-02,-0.336738,0.163098,1.158359,-2.177592
2013-01-03,1.492281,0.833172,-0.922144,0.241526
2013-01-04,0.218803,-0.528804,0.305116,-1.134102
2013-01-05,-2.032722,-0.242343,0.435498,-1.610317
2013-01-06,1.103152,-1.347008,-0.576044,-0.586502


In [22]:
# Show index, column names and values themselves
df.index
df.columns
df.values

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

Index(['A', 'B', 'C', 'D'], dtype='object')

array([[ 1.10974143, -0.19994053,  0.75937037, -1.81354217],
       [-0.33673782,  0.16309795,  1.15835945, -2.17759175],
       [ 1.49228078,  0.83317248, -0.92214418,  0.24152591],
       [ 0.21880322, -0.52880353,  0.30511556, -1.13410166],
       [-2.03272211, -0.24234273,  0.43549823, -1.61031684],
       [ 1.10315197, -1.34700835, -0.57604414, -0.58650199]])

In [23]:
# Summary statistics
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.259086,-0.220304,0.193359,-1.180088
std,1.308881,0.724202,0.794864,0.889586
min,-2.032722,-1.347008,-0.922144,-2.177592
25%,-0.197853,-0.457188,-0.355754,-1.762736
50%,0.660978,-0.221142,0.370307,-1.372209
75%,1.108094,0.072338,0.678402,-0.723402
max,1.492281,0.833172,1.158359,0.241526


In [24]:
# Transposing data
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,1.109741,-0.336738,1.492281,0.218803,-2.032722,1.103152
B,-0.199941,0.163098,0.833172,-0.528804,-0.242343,-1.347008
C,0.75937,1.158359,-0.922144,0.305116,0.435498,-0.576044
D,-1.813542,-2.177592,0.241526,-1.134102,-1.610317,-0.586502


In [30]:
# Sort by index
df.sort_index(axis=1,ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.813542,0.75937,-0.199941,1.109741
2013-01-02,-2.177592,1.158359,0.163098,-0.336738
2013-01-03,0.241526,-0.922144,0.833172,1.492281
2013-01-04,-1.134102,0.305116,-0.528804,0.218803
2013-01-05,-1.610317,0.435498,-0.242343,-2.032722
2013-01-06,-0.586502,-0.576044,-1.347008,1.103152


In [32]:
# Sort by values
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,1.103152,-1.347008,-0.576044,-0.586502
2013-01-04,0.218803,-0.528804,0.305116,-1.134102
2013-01-05,-2.032722,-0.242343,0.435498,-1.610317
2013-01-01,1.109741,-0.199941,0.75937,-1.813542
2013-01-02,-0.336738,0.163098,1.158359,-2.177592
2013-01-03,1.492281,0.833172,-0.922144,0.241526


In [34]:
# Create series from initial set of series (i.e. select column)
df['A']

2013-01-01    1.109741
2013-01-02   -0.336738
2013-01-03    1.492281
2013-01-04    0.218803
2013-01-05   -2.032722
2013-01-06    1.103152
Freq: D, Name: A, dtype: float64

In [37]:
# Select rows
df[1:3] 
# Note: Remember it starts from 0 (hence 1 is the second observation), 
# and ends prior to last (hence only goes up to second observation)
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-0.336738,0.163098,1.158359,-2.177592
2013-01-03,1.492281,0.833172,-0.922144,0.241526


Unnamed: 0,A,B,C,D
2013-01-02,-0.336738,0.163098,1.158359,-2.177592
2013-01-03,1.492281,0.833172,-0.922144,0.241526
2013-01-04,0.218803,-0.528804,0.305116,-1.134102
