In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Create a series - data can be ndarray, dictionary, scalar value
# s = pd.Series(data, index=index)

# If data is ndarray, index must be same length as data
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [4]:
s

a   -1.033516
b   -1.693762
c   -0.904835
d    0.394013
e   -0.275437
dtype: float64

In [5]:
s.index

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [6]:
pd.Series(np.random.randn(5))

0   -0.689219
1   -0.164055
2    0.382208
3    0.455655
4   -0.432455
dtype: float64

In [7]:
# If data is a dict
d = {'a' : 0, 'b' : 1., 'c' : 2.}

In [8]:
pd.Series(d)

a    0
b    1
c    2
dtype: float64

In [9]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b     1
c     2
d   NaN
a     0
dtype: float64

# Object Creation

In [10]:
# Create dataframe by passing numpy array with datetime index and labeled columns
dates = pd.date_range('20130101', periods=6)

In [11]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [12]:
dataframe = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [13]:
dataframe

Unnamed: 0,A,B,C,D
2013-01-01,0.382312,0.33295,1.417078,0.122972
2013-01-02,-0.244801,0.711601,-0.690342,-1.085262
2013-01-03,-0.82944,1.201025,-1.329416,-0.781519
2013-01-04,-0.165716,0.23345,0.669627,-0.251336
2013-01-05,-1.356043,-0.834144,0.503567,0.819814
2013-01-06,-1.152413,-0.215945,-0.942161,1.460893


In [14]:
# Creating a datafram by passing a dict of objects that can be converted to series-like

In [15]:
dataframe2 = pd.DataFrame({ 'A' : 1.,
                            'B' : pd.Timestamp('20130102'),
                            'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
                            'D' : np.array([3] * 4, dtype='int32'),
                            'E' : pd.Categorical(["test", "train", "test", "train"]),
                            'F' : 'foo'})

In [16]:
dataframe2

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1,3,test,foo
1,1,2013-01-02,1,3,train,foo
2,1,2013-01-02,1,3,test,foo
3,1,2013-01-02,1,3,train,foo


In [17]:
# Having specidif dtypes
dataframe2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [18]:
dataframe2.A

0    1
1    1
2    1
3    1
Name: A, dtype: float64

In [19]:
# Viewing data
dataframe.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.382312,0.33295,1.417078,0.122972
2013-01-02,-0.244801,0.711601,-0.690342,-1.085262
2013-01-03,-0.82944,1.201025,-1.329416,-0.781519
2013-01-04,-0.165716,0.23345,0.669627,-0.251336
2013-01-05,-1.356043,-0.834144,0.503567,0.819814


In [22]:
dataframe.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.165716,0.23345,0.669627,-0.251336
2013-01-05,-1.356043,-0.834144,0.503567,0.819814
2013-01-06,-1.152413,-0.215945,-0.942161,1.460893


In [23]:
dataframe.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [24]:
dataframe.columns

Index([u'A', u'B', u'C', u'D'], dtype='object')

In [25]:
dataframe.values

array([[ 0.3823124 ,  0.33294959,  1.41707793,  0.12297234],
       [-0.24480139,  0.71160145, -0.69034169, -1.08526236],
       [-0.82943963,  1.20102454, -1.32941646, -0.78151924],
       [-0.16571646,  0.2334502 ,  0.66962651, -0.25133642],
       [-1.35604324, -0.83414385,  0.50356727,  0.81981361],
       [-1.15241284, -0.21594533, -0.94216109,  1.46089259]])

In [26]:
dataframe.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.561017,0.238156,-0.061941,0.047593
std,0.663324,0.70941,1.078764,0.965357
min,-1.356043,-0.834144,-1.329416,-1.085262
25%,-1.07167,-0.103596,-0.879206,-0.648974
50%,-0.537121,0.2832,-0.093387,-0.064182
75%,-0.185488,0.616938,0.628112,0.645603
max,0.382312,1.201025,1.417078,1.460893


In [27]:
dataframe.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.382312,-0.244801,-0.82944,-0.165716,-1.356043,-1.152413
B,0.33295,0.711601,1.201025,0.23345,-0.834144,-0.215945
C,1.417078,-0.690342,-1.329416,0.669627,0.503567,-0.942161
D,0.122972,-1.085262,-0.781519,-0.251336,0.819814,1.460893


In [28]:
dataframe.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.122972,1.417078,0.33295,0.382312
2013-01-02,-1.085262,-0.690342,0.711601,-0.244801
2013-01-03,-0.781519,-1.329416,1.201025,-0.82944
2013-01-04,-0.251336,0.669627,0.23345,-0.165716
2013-01-05,0.819814,0.503567,-0.834144,-1.356043
2013-01-06,1.460893,-0.942161,-0.215945,-1.152413


In [31]:
dataframe.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-05,-1.356043,-0.834144,0.503567,0.819814
2013-01-06,-1.152413,-0.215945,-0.942161,1.460893
2013-01-04,-0.165716,0.23345,0.669627,-0.251336
2013-01-01,0.382312,0.33295,1.417078,0.122972
2013-01-02,-0.244801,0.711601,-0.690342,-1.085262
2013-01-03,-0.82944,1.201025,-1.329416,-0.781519
