In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.notebook_repr_html',False)
pd.set_option('display.max_columns',10)
pd.set_option('display.max_rows',10)

In [2]:
s1=pd.Series(2)
s1

0    2
dtype: int64

In [3]:
s1[0]

2

In [4]:
pd.Series([1,2,3])

0    1
1    2
2    3
dtype: int64

In [5]:
s1.values

array([2], dtype=int64)

In [6]:
s1.index

Int64Index([0], dtype='int64')

In [7]:
s3=pd.Series([5,6,7],index=['a','b','c'])
s3

a    5
b    6
c    7
dtype: int64

In [8]:
s3.index

Index(['a', 'b', 'c'], dtype='object')

In [9]:
s3['c']

7

In [10]:
#only one number allowed
s4=pd.Series(2, index=s3.index)
s4

a    2
b    2
c    2
dtype: int64

In [11]:
s5=pd.Series({'a':1,'b':np.nan})
s5

a     1
b   NaN
dtype: float64

In [12]:
len(s5),s5.size,s5.count(),s4.unique()

(2, 2, 1, array([2], dtype=int64))

In [13]:
s3.head()  #default=5 

a    5
b    6
c    7
dtype: int64

In [14]:
s3.head(2)

a    5
b    6
dtype: int64

In [15]:
s3.take([1,2])#must be int

b    6
c    7
dtype: int64

In [16]:
s3['a']

5

In [17]:
s3[['a','b']]

a    5
b    6
dtype: int64

In [18]:
#index, loc,iloc...
s6=pd.Series([1,2,3],index=[10,11,12])
s6,s6.loc[11],s6[11],s6.iloc[1] #iloc: absolute index(0-based)

(10    1
 11    2
 12    3
 dtype: int64, 2, 2, 2)

In [19]:
s6.iloc[[0,2]]

10    1
12    3
dtype: int64

In [20]:
#if index is int type, the lookup will be by index instaed of position
s6.ix[[11,12,0,1]]

11     2
12     3
0    NaN
1    NaN
dtype: float64

In [21]:
#if index is not int type, both ways work
s3.ix[['a','b']]

a    5
b    6
dtype: int64

In [22]:
s3.ix[[0,1]]

a    5
b    6
dtype: int64

In [23]:
#pandas series addition, different from numpy array addition
#The result will be aligned
s7=pd.Series([7,6,5],index=['c','b','a'])
s3+s7

a    10
b    12
c    14
dtype: int64

In [24]:
s7*s3 # ignore indices. Series mutiplication

a    25
b    36
c    49
dtype: int64

In [25]:
#duplicate alignment,and not-matching alignment
s8 = pd.Series([7,6,5],index=['c','c','c'])
s7+s8

a   NaN
b   NaN
c    14
c    13
c    12
dtype: float64

In [26]:
#NaN behavior in numpy and pandas Series
nda=np.array([1,2,3,np.NaN])
nda.mean()

nan

In [27]:
s9=pd.Series(nda)#will ignore NaN
s9.mean()

2.0

In [28]:
#Or exlicitly assign skipna
s9.mean(skipna=False)

nan

In [29]:
s10=pd.Series(np.arange(10))
s10>5

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7     True
8     True
9     True
dtype: bool

In [30]:
s10[s10>5]

6    6
7    7
8    8
9    9
dtype: int32

In [31]:
s10[(s10>5) & (s10<2)]
# can not use normal syntax (and, or)here

Series([], dtype: int32)

In [32]:
(s10>=0).all(),s10[s10<0].any(),(s10<3).sum(),(s10>2).any()

(True, False, 3, True)

In [33]:
s11=s10.reindex([2,1,'a','b'])
s11

2     2
1     1
a   NaN
b   NaN
dtype: float64

In [34]:
#cannot add different type of indices
#Ex
s21 = pd.Series([0,1,2],index=[0,1,2])
s22 = pd.Series([0,1,2],index=['0','1','2'])
s21+s22

0   NaN
1   NaN
2   NaN
0   NaN
1   NaN
2   NaN
dtype: float64

In [35]:
#We need to cast indices to int
s22.index=s22.index.values.astype(int)
s21+s22

0    0
1    2
2    4
dtype: int64

In [36]:
s23=s21.copy()
s23

0    0
1    1
2    2
dtype: int64

In [37]:
s23.reindex(['a','b'],fill_value=3) # This is not in-place

a    3
b    3
dtype: int64

In [38]:
#modify in-place
s23['d']=3
del(s23[1])
s23

0    0
2    2
d    3
dtype: int64

In [42]:
s23=pd.Series(np.arange(10),index=np.arange(10,20))
#s23[1] is illegal. But slice syntax is legal
s23[1:3]

11    1
12    2
dtype: int32

In [43]:
#Or using iloc
s23.iloc[[2,4]]

12    2
14    4
dtype: int32

In [46]:
s24=s23[1:3]
s24

11    1
12    2
dtype: int32

In [47]:
s3['a':'c'] #include 'd'

a    5
b    6
c    7
dtype: int64