# Pandas 基本用法
[原文](https://blog.csdn.net/cxmscb/article/details/54632492)

## 数据结构Series

In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [3]:
x = Series([1,2,3,4])
x

0    1
1    2
2    3
3    4
dtype: int64

In [6]:
print(x.values)
print(x.index)

[1 2 3 4]
RangeIndex(start=0, stop=4, step=1)


In [8]:
x = Series([1,2,3,4], index=['a', 'b', 'c', 'd'])
print(x)

a    1
b    2
c    3
d    4
dtype: int64


In [11]:
print(x[['c', 'a', 'b']]) # 花式索引
print(x[x>2]) # 布尔索引

c    3
a    1
b    2
dtype: int64
c    3
d    4
dtype: int64


In [13]:
# 使用字典来生成Series
data = {'a':1, 'b':2, 'd':3, 'c':4}
x = Series(data)
print(x)

a    1
b    2
d    3
c    4
dtype: int64


In [14]:
# 使用字典生成Series,并指定额外的index，不匹配的索引部分数据为NaN
data = {'a':1, 'b':2, 'd':3, 'c':4}
y = Series(data, index=['a', 'b', 'e'])
print(y)

a    1.0
b    2.0
e    NaN
dtype: float64


In [15]:
# Series相加，相同行索引相加，不同行索引则数值为NaN
print(x+y)

a    2.0
b    4.0
c    NaN
d    NaN
e    NaN
dtype: float64


In [16]:
# 指定Series/索引的名字
y.name = "weight of letters"
y.index.name = "letter"
print(y)

letter
a    1.0
b    2.0
e    NaN
Name: weight of letters, dtype: float64


## 数据结构DataFrame

In [18]:
# 使用字典生成DataFrame，key为列名字
data = {'state':['ok', 'ok', 'good', 'bad'],
        'year':[2000, 2001, 2002, 2003],
        'pop':[3.7, 3.6, 2.4, 0.9]}
print(DataFrame(data))

  state  year  pop
0    ok  2000  3.7
1    ok  2001  3.6
2  good  2002  2.4
3   bad  2003  0.9


In [23]:
x = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four'])
print(x)

       year state  pop debt
one    2000    ok  3.7  NaN
two    2001    ok  3.6  NaN
three  2002  good  2.4  NaN
four   2003   bad  0.9  NaN


In [25]:
print(x['debt'])

one      NaN
two      NaN
three    NaN
four     NaN
Name: debt, dtype: object


In [28]:
print(x.loc['three']) # 用.loc[]来索引 一列

year     2002
state    good
pop       2.4
debt      NaN
Name: three, dtype: object


In [30]:
x['debt'] = 16.6
print(x)

       year state  pop  debt
one    2000    ok  3.7  16.6
two    2001    ok  3.6  16.6
three  2002  good  2.4  16.6
four   2003   bad  0.9  16.6


In [33]:
# 用Series修改元素，没有指定的默认数据用NaN
val = Series([-1.2, -1.5, 1.7,0], index = ['one', 'two', 'three','six']) 
x.debt = val # DataFrame的行索引不变
print(x)

       year state  pop  debt
one    2000    ok  3.7  -1.2
two    2001    ok  3.6  -1.5
three  2002  good  2.4   1.7
four   2003   bad  0.9   NaN


In [34]:
# 添加列
x['gain'] = (x.debt > 0)
print(x)

       year state  pop  debt   gain
one    2000    ok  3.7  -1.2  False
two    2001    ok  3.6  -1.5  False
three  2002  good  2.4   1.7   True
four   2003   bad  0.9   NaN  False


## numpy在Series/DataFrame的应用

In [36]:
import numpy as np
frame = DataFrame(np.arange(9).reshape(3, 3), columns=['A','B','C'], index=['a','b','c'])
print(frame)

   A  B  C
a  0  1  2
b  3  4  5
c  6  7  8


In [44]:
series = frame.loc['a']
print(type(series))
print(series)
series = frame.A
print(type(series))
print(series)

<class 'pandas.core.series.Series'>
A    0
B    1
C    2
Name: a, dtype: int32
<class 'pandas.core.series.Series'>
a    0
b    3
c    6
Name: A, dtype: int32


In [45]:
frame.max()

A    6
B    7
C    8
dtype: int32

### lambda的应用

In [47]:
print(frame.max())
print(frame.min())

A    6
B    7
C    8
dtype: int32
A    0
B    1
C    2
dtype: int32


In [48]:
f = lambda x: x.max() - x.min()
print(frame.apply(f)) # 作用到每一列

A    6
B    6
C    6
dtype: int64


In [50]:
print(frame.apply(f, axis=1)) # 作用到每一行

a    2
b    2
c    2
dtype: int64


In [51]:
def f(x): # Series的元素的类型为Series
    return Series([x.min(), x.max()], index = ['min', 'max'])
print(frame.apply(f))

     A  B  C
min  0  1  2
max  6  7  8


In [52]:
# applymap 和 map 作用到每一个元素
_format = lambda x: '%.2f' % x
print(frame.applymap(_format)) # 针对DataFrame

      A     B     C
a  0.00  1.00  2.00
b  3.00  4.00  5.00
c  6.00  7.00  8.00


In [53]:
print(frame['A'].map(_format)) # 针对Series
print(frame.A.map(_format)) # 针对Series

a    0.00
b    3.00
c    6.00
Name: A, dtype: object
a    0.00
b    3.00
c    6.00
Name: A, dtype: object
