In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# Pandas的许多代码风格与NumPy类似
# Pandas与NumPy最大的不同在于，Pandas是专门为处理表格和混杂（异质）数据设计的，而NumPy更适合处理统一（同质）的数值数组数据。

# Series是一种类似于一维数组的对象，它由一组数据（各种NumPy数据类型）以及一组与之相关的数据标签（即索引）组成。
# 也可以将Series简单理解为字典

In [2]:
obj = pd.Series([4, 7, -5, 3])  # 没有指定索引的话，默认生成 0 ~ LEN-1 作为索引
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [3]:
obj.values

array([ 4,  7, -5,  3])

In [4]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])  # 指定索引
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [6]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [7]:
obj2['a']

-5

In [8]:
obj2[['c', 'a', 'd']]

c    3
a   -5
d    4
dtype: int64

In [9]:
obj2[obj2 > 0]

d    4
b    7
c    3
dtype: int64

In [10]:
obj2 * 2

d     8
b    14
a   -10
c     6
dtype: int64

In [11]:
np.exp(obj2)

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [12]:
# Series与字典
'b' in obj2

True

In [13]:
'e' in obj2

False

In [14]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [15]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)  # 指定Serise的索引顺序，字典中的‘Utah’数据会丢失
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [16]:
obj4.isnull()  # <==> pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [17]:
obj4.notnull()  # <==> pd.notnull(obj4)

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [18]:
# Series的索引对齐，类似于关系型数据库中的join
obj3
"""
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
"""
obj4
"""
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64
"""
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [19]:
# Series对象本身与其索引都有name属性，该属性跟pandas其他的关键功能关系非常密切
obj4.name = 'population'
obj4.index.name = 'state'
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [20]:
obj
"""
0    4
1    7
2   -5
3    3
dtype: int64
"""
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64