## pandas 

### 1、Series

In [1]:
import pandas as pd
import numpy as np
pd.set_option("max_columns", 50)

In [2]:
# series： 一维数组
a = pd.Series([7, 'Beijing', 2.17, -12344, 'Happy Birthday!'])
a

0                  7
1            Beijing
2               2.17
3             -12344
4    Happy Birthday!
dtype: object

In [3]:
# 指定索引
a_1 = pd.Series([7, 'Beijing', 2.17, -12344, 'Happy Birthday!'],
             index=['A', 'B', 'C', 'D', 'E'])
a_1

A                  7
B            Beijing
C               2.17
D             -12344
E    Happy Birthday!
dtype: object

In [5]:
# 字典
cities = {'Beijing': 55000, 'Shanghai': 60000, 'Shenzhen': 50000, 'Hangzhou': 20000, 'Guangzhou': 25000, 'Suzhou': None}
a_2 = pd.Series(cities)
a_2

Beijing      55000.0
Guangzhou    25000.0
Hangzhou     20000.0
Shanghai     60000.0
Shenzhen     50000.0
Suzhou           NaN
dtype: float64

In [6]:
a_2[['Beijing','Shanghai']]

Beijing     55000.0
Shanghai    60000.0
dtype: float64

In [9]:
# 取值
print(a_2<50000)
print(a_2[a_2<50000])

Beijing      False
Guangzhou     True
Hangzhou      True
Shanghai     False
Shenzhen     False
Suzhou       False
dtype: bool
Guangzhou    25000.0
Hangzhou     20000.0
dtype: float64


In [10]:
# 赋值
a_2['Shenzhen'] = 55000
a_2

Beijing      55000.0
Guangzhou    25000.0
Hangzhou     20000.0
Shanghai     60000.0
Shenzhen     55000.0
Suzhou           NaN
dtype: float64

In [12]:
a_3 = pd.Series({'Beijing': 300000, 'Shanghai': 400000, 'Shenzhen': 300000, \
                      'Tianjin': 200000, 'Guangzhou': 200000, 'Chongqing': 150000})
print(a_3)
# 两个series相加，没有的是NaN
a_3+a_2*10

Beijing      300000
Chongqing    150000
Guangzhou    200000
Shanghai     400000
Shenzhen     300000
Tianjin      200000
dtype: int64


Beijing       850000.0
Chongqing          NaN
Guangzhou     450000.0
Hangzhou           NaN
Shanghai     1000000.0
Shenzhen      850000.0
Suzhou             NaN
Tianjin            NaN
dtype: float64

In [13]:
# 是否为空
print(a_3.notnull())

Beijing      True
Chongqing    True
Guangzhou    True
Shanghai     True
Shenzhen     True
Tianjin      True
dtype: bool


### 2、Dataframe

##### dataframe表示的是一个二维数组

In [14]:
data = {'city': ['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Hangzhou', 'Chongqing'],
       'year': [2016,2017,2016,2017,2016, 2016],
       'population': [2100, 2300, 1000, 700, 500, 500]}
print(pd.DataFrame(data))
print(pd.DataFrame(data, columns=['year', 'city', 'population']))

        city  population  year
0    Beijing        2100  2016
1   Shanghai        2300  2017
2  Guangzhou        1000  2016
3   Shenzhen         700  2017
4   Hangzhou         500  2016
5  Chongqing         500  2016
   year       city  population
0  2016    Beijing        2100
1  2017   Shanghai        2300
2  2016  Guangzhou        1000
3  2017   Shenzhen         700
4  2016   Hangzhou         500
5  2016  Chongqing         500


In [15]:
# 修改列的位置，增加一列，修改索引
b = pd.DataFrame(data, \
                     columns = ['year', 'city', 'population', 'debt'],
                     index = ['one', 'two', 'three', 'four', 'five', 'six'])
print(b)

       year       city  population debt
one    2016    Beijing        2100  NaN
two    2017   Shanghai        2300  NaN
three  2016  Guangzhou        1000  NaN
four   2017   Shenzhen         700  NaN
five   2016   Hangzhou         500  NaN
six    2016  Chongqing         500  NaN


In [21]:
# 打印出城市这一列
print(b.city)
# 打印出第四行
print(b.ix['four'])

one        Beijing
two       Shanghai
three    Guangzhou
four      Shenzhen
five      Hangzhou
six      Chongqing
Name: city, dtype: object
year              2017
city          Shenzhen
population         700
debt               NaN
Name: four, dtype: object
