In [1]:
import numpy as np
import pandas as pd

# Series

Series是pandas中的两种基础数据结构之一，可以理解为一维带标签数组
* 数组中数据可以为任意类型（整数、字符串、浮点数， Python objects等）
* 数组中数据为统一类型(homogeneous)

### 创建Series

建立一个Series
   - s = pd.Series(data, index=index)
   
这里data可以是
   - list
   - array
   - dictionary

In [2]:
price = pd.Series([15280, 45888, 15692, 55689, 28410, 27566])
price

0    15280
1    45888
2    15692
3    55689
4    28410
5    27566
dtype: int64

In [3]:
type(price)

pandas.core.series.Series

In [4]:
# 通过List进行创建
price = pd.Series([15280, 45888, 15692, 55689, 28410, 27566], name="price")
price

0    15280
1    45888
2    15692
3    55689
4    28410
5    27566
Name: price, dtype: int64

In [5]:
pd.Series?

In [6]:
# 通过Dict进行创建
# 需要注意的是Dict是没有顺序的，但是Series是有顺序的
temp = {'Mon': 33, 'Tue':19, 'Wed': 15, 'Thu':89, 'Fri':11, 'Sat':-5, 'Sun':9}
pd.Series(temp)

Fri    11
Mon    33
Sat    -5
Sun     9
Thu    89
Tue    19
Wed    15
dtype: int64

创建好Series之后，之前Numpy中的通用方式是可以直接使用的

In [7]:
price.mean()    # 均值

31420.833333333332

In [8]:
price.sum()    # 总和

188525

In [9]:
price.head(2)   # 只取前n个

0    15280
1    45888
Name: price, dtype: int64

In [10]:
price.tail(3)   # 只取后面n个

3    55689
4    28410
5    27566
Name: price, dtype: int64

In [11]:
price.max?

In [13]:
print(dir(price))    # 获取Series对象所能使用的所有的方法

['T', '_AXIS_ALIASES', '_AXIS_IALIASES', '_AXIS_LEN', '_AXIS_NAMES', '_AXIS_NUMBERS', '_AXIS_ORDERS', '_AXIS_REVERSED', '_AXIS_SLICEMAP', '__abs__', '__add__', '__and__', '__array__', '__array_prepare__', '__array_priority__', '__array_wrap__', '__bool__', '__bytes__', '__class__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lt__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rand__', '__rdiv__', '__reduce__', '__reduce_ex__', '__re

### data type in series

In [14]:
price = pd.Series([15280,45888,15692,55689,28410,27566])
price.dtype

dtype('int64')

In [15]:
price = pd.Series([15280,45888,15692,55689,28410,27566.3])
price.dtype

dtype('float64')

In [16]:
city=pd.Series(['wh','sh','hz','bj','gz','nj'])
city.dtype

dtype('O')

In [19]:
temp=pd.Series([{},[],(3,4)])
temp.dtype
# print(temp.dtype)

object


In [18]:
x=pd.Series(['2016-01-01','2017-01-01'])
print(x.dtype)
pd.to_datetime(x)

object


0   2016-01-01
1   2017-01-01
dtype: datetime64[ns]

In [20]:
x=pd.Series(['hw','apple','vivo','mi','hw','oppo','samsung','vivo'],dtype='category')
x

0         hw
1      apple
2       vivo
3         mi
4         hw
5       oppo
6    samsung
7       vivo
dtype: category
Categories (6, object): [apple, hw, mi, oppo, samsung, vivo]

### boolean

In [21]:
mask=pd.Series([True,False,True,True,False,False])
mask

0     True
1    False
2     True
3     True
4    False
5    False
dtype: bool

In [22]:
price

0    15280.0
1    45888.0
2    15692.0
3    55689.0
4    28410.0
5    27566.3
dtype: float64

In [23]:
price[mask]

0    15280.0
2    15692.0
3    55689.0
dtype: float64

In [24]:
mask2=pd.Series([True,False,True,True,False,True])
mask|mask2

0     True
1    False
2     True
3     True
4    False
5     True
dtype: bool

In [25]:
mask&mask2

0     True
1    False
2     True
3     True
4    False
5    False
dtype: bool

In [26]:
~mask

0    False
1     True
2    False
3    False
4     True
5     True
dtype: bool

In [27]:
price>20000&price<30000

TypeError: cannot compare a dtyped [float64] array with a scalar of type [bool]

In [28]:
(price>20000)&(price<30000)

0    False
1    False
2    False
3    False
4     True
5     True
dtype: bool

In [29]:
price[(price>20000)&(price<30000)]

4    28410.0
5    27566.3
dtype: float64

In [32]:
temp=[True,False,True,True,False,False]
temp2=[True,False,True,True,False,False]
temp & temp2

TypeError: unsupported operand type(s) for &: 'list' and 'list'

### index

In [33]:
price

0    15280.0
1    45888.0
2    15692.0
3    55689.0
4    28410.0
5    27566.3
dtype: float64

In [35]:
price[2]

15692.0

In [36]:
price = pd.Series([15280, 45888, 15692, 55689, 28410, 27566], index=['wh', 'sh', 'hz', 'bj', 'gz', 'nj'])
price

wh    15280
sh    45888
hz    15692
bj    55689
gz    28410
nj    27566
dtype: int64

In [37]:
price.index

Index(['wh', 'sh', 'hz', 'bj', 'gz', 'nj'], dtype='object')

In [38]:
price['sh']

45888

In [39]:
price.index.name = 'city'    # 可以给整个index赋值
price

city
wh    15280
sh    45888
hz    15692
bj    55689
gz    28410
nj    27566
dtype: int64

In [41]:
# 还可以这么方便的给设置时间序列
dates = pd.date_range('2016-01-01', '2016-06-01',freq='M')
dates

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31'],
              dtype='datetime64[ns]', freq='M')

In [42]:
tempature = pd.Series([13, 15, 20, 27, 29], index=dates)
tempature

2016-01-31    13
2016-02-29    15
2016-03-31    20
2016-04-30    27
2016-05-31    29
Freq: M, dtype: int64

In [45]:
temp= pd.Series([13,15,20, 27,29], index=[0, 2, 2, 3, 4])
temp

0    13
2    15
2    20
3    27
4    29
dtype: int64

In [46]:
pd.date_range?

### index/slicing