In [2]:
%pylab inline
import pandas as pd
import numpy as np

Populating the interactive namespace from numpy and matplotlib


## Index
* [Create](#create)
    * [create by passing values and indices](#create_normal)
    * [create from a dictionary](#create_from_dict)
        * [transform back to dict](#to_dict)
        * [use 'get' to avoid KeyError](#safe_get)
    * [create with Hierarchical Index](#create_hierarchical)
* [Index and Slice](#index_slice)
* [Arithmetic](#arithmetic)

<a id="create"></a>
## Create

<a id="create_normal"></a>
### create by passing values and indices

In [3]:
 # !!!  WON'T sort according to index, values and indices are in the original order when they are passed in
s = pd.Series([6,8,9,1],index=["d","a","c","b"])
s

d    6
a    8
c    9
b    1
dtype: int64

<a id="create_from_dict"></a>
### create by passing a dictionary

In [4]:
s = pd.Series({"y":9.9,"x":6.8,"a":1,"b":3.14})
s # index are reordered

a    1.00
b    3.14
x    6.80
y    9.90
dtype: float64

In [5]:
# we can pass in an index, to select subset of the original dictionary and fillna for those missing
s = pd.Series({"y":9.9,"x":6.8,"a":1,"b":3.14},index=["a","x","z"])
s

a    1.0
x    6.8
z    NaN
dtype: float64

<a id= "to_dict"></a>
### transform back to a dictionary

In [6]:
s.to_dict()

{'a': 1.0, 'x': 6.7999999999999998, 'z': nan}

<a id= "safe_get"></a>
### use get to avoid KeyError

In [12]:
s.get("xxx",np.nan)

nan

<a id="create_hierarchical"></a>
### create series with Hierarchical Index

In [7]:
pd.Series(np.arange(1,11),                  
          index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],                        
                 ["one", "two", "three", "one", "two", "three","one", "two","two", "three"]])

a  one       1
   two       2
   three     3
b  one       4
   two       5
   three     6
c  one       7
   two       8
d  two       9
   three    10
dtype: int32

<a id="index_slice"></a>
## Index and Slice

In [8]:
s = pd.Series({"y":9.9,"x":6.8,"a":1,"b":3.14})
print "x" in s
print s["a"],s.y # two ways to access certain element
# print s["NoSuchKey"] # access by non-existing key will throw exception

True
1.0 9.9


In [9]:
print "************** whole:\n",s
print "************** subset by slice:\n",s[:2]

************** whole:
a    1.00
b    3.14
x    6.80
y    9.90
dtype: float64
************** subset by slice:
a    1.00
b    3.14
dtype: float64


<a id="arithmetic"></a>
## Arithmetic

In [10]:
# arithmetic between two series is actually a join operation
# by default, it is a outer-join, the result indices are the union of each separate series
s1 = pd.Series([0,1,2,3],index=["a","b","c","d"])
s2 = pd.Series([1,2,3,4],index=["b","c","d","e"])
s1+s2

a   NaN
b     2
c     4
d     6
e   NaN
dtype: float64