In [11]:
# http://pandas-docs.github.io/pandas-docs-travis/dsintro.html#dsintro
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

In [13]:
# a series is a one dimensional labelled array capable of holding any data type. Index are the 'axis labels', or name on each row. Basic method
# to create a series is to call: 
# s = pd.Series(data, index=index)

In [15]:
s = pd.Series(np.random.randn(7), index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])

In [16]:
s

a    1.072706
b   -0.085702
c   -1.937969
d   -0.010064
e   -0.727305
f   -0.059714
g   -0.237136
dtype: float64

In [17]:
s.index

Index([u'a', u'b', u'c', u'd', u'e', u'f', u'g'], dtype='object')

In [16]:
pd.Series(np.random.randn(10))

0   -0.566130
1    0.314816
2   -1.508451
3   -1.626938
4   -1.318560
5    0.272385
6    0.504731
7   -0.450023
8    1.065213
9    0.013970
dtype: float64

In [23]:
# If the data input is a dict, if the index is not explicitly assigned, it will be constructed from the sorted keys of the dict
d = {'a' : 0., 'b' : 1., 'c' : 2.}
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [25]:
# Example of when explicitly assigned:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [None]:
# If data is a scalar value, an index must be provided. The value will be repeated to match the length of index
pd.Series(5., index=['a','b','d','r'],)

In [29]:
# slicing a series
s[0]

-0.32767361088692037

In [32]:
s[4]

0.90153888395861015

In [37]:
s[:3]

a   -0.327674
b   -0.296510
c   -0.820204
dtype: float64

In [39]:
s[s > s.median()]

b   -0.296510
e    0.901539
g    2.105469
dtype: float64

In [41]:
s.median()

-0.3276736108869204

In [20]:
s.mean()

-0.28359783282535056

In [21]:
s[[4, 3, 1]]

e   -0.727305
d   -0.010064
b   -0.085702
dtype: float64

In [22]:
np.exp(s)

a    2.923278
b    0.917868
c    0.143996
d    0.989986
e    0.483210
f    0.942034
g    0.788884
dtype: float64

In [23]:
# Series is dict-like
s['a']

1.0727055810594777

In [24]:
s['e'] = 12
s

a     1.072706
b    -0.085702
c    -1.937969
d    -0.010064
e    12.000000
f    -0.059714
g    -0.237136
dtype: float64

In [25]:
'e' in s

True

In [26]:
'f' in s

True

In [27]:
'k' in s

False

In [None]:
# Using the get method, a missing 
# label will return None or specified default:1

In [32]:
s.get('a')

-0.059714325547745639

In [34]:
s.get('f', np.nan)

-0.059714325547745639

In [36]:
s

a     1.072706
b    -0.085702
c    -1.937969
d    -0.010064
e    12.000000
f    -0.059714
g    -0.237136
dtype: float64

In [None]:
s.get('a')

In [None]:
# vectorized operations

In [None]:
s + s

In [44]:
s*3

a     3.218117
b    -0.257106
c    -5.813908
d    -0.030193
e    36.000000
f    -0.179143
g    -0.711407
dtype: float64

In [46]:
s**2

a      1.150697
b      0.007345
c      3.755725
d      0.000101
e    144.000000
f      0.003566
g      0.056233
dtype: float64

In [None]:
s^2

In [52]:
np.exp(s)

a         2.923278
b         0.917868
c         0.143996
d         0.989986
e    162754.791419
f         0.942034
g         0.788884
dtype: float64

In [None]:
#A key difference between Series and ndarray is that operations between 
# Series automatically align the data based on label. Thus, you can write 
# computations without giving consideration to whether the Series involved 
# have the same labels.

In [57]:
s[1:]

b    -0.085702
c    -1.937969
d    -0.010064
e    12.000000
f    -0.059714
g    -0.237136
dtype: float64

In [59]:
s

a     1.072706
b    -0.085702
c    -1.937969
d    -0.010064
e    12.000000
f    -0.059714
g    -0.237136
dtype: float64

In [63]:
s['c'] = 6

In [66]:
s[1:]

b    -0.085702
c     6.000000
d    -0.010064
e    12.000000
f    -0.059714
g    -0.237136
dtype: float64

In [69]:
s[:-1]

a     1.072706
b    -0.085702
c     6.000000
d    -0.010064
e    12.000000
f    -0.059714
dtype: float64

In [72]:
s[1:] + s[:-1]

a          NaN
b    -0.171404
c    12.000000
d    -0.020129
e    24.000000
f    -0.119429
g          NaN
dtype: float64

In [75]:
# Series can also have a name attribute:
s1 = pd.Series(np.random.randn(5), name='something')

In [None]:
s1

In [None]:
s1.name

In [None]:
# Rename
s2 = s1.rename('different')
s2