# DataFrame 学习笔记

In [1]:
import numpy as np
import pandas as pd

## 1. 创建

### 通过 Dict 创建

In [None]:
dict = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),   # 构建Series字典
        'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(data=dict)  # 简单
df

In [None]:
pd.DataFrame(data=dict, index=['d', 'b', 'a'], columns=['two', 'three'])  # 通过index和columns定制

In [None]:
df.index

In [None]:
df.columns

### 通过 Numpy 创建

In [6]:
ndarray = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])  # 创建一个[2,3]的空数组
# 数据类型 i4=int32, f4=float32, a10=byte80
ndarray

array([(0,  0., b''), (0,  0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [24]:
dt = np.dtype('S10')  # a10和S10一样

In [25]:
dt.name

'bytes80'

In [7]:
ndarray[:] = [(1,2.,'Hello'), (2,3.,"World")]  # 赋值
ndarray

array([(1,  2., b'Hello'), (2,  3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [8]:
pd.DataFrame(data=ndarray)  # 简单

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [9]:
pd.DataFrame(data=ndarray, index=['first', 'second'], columns=['C', 'A', 'B'])  # 详细

Unnamed: 0,C,A,B
first,b'Hello',1,2.0
second,b'World',2,3.0


### 通过字典列表创建

In [None]:
list_dict = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]  # 按行的方式
pd.DataFrame(data=list_dict)

In [None]:
pd.DataFrame(data=list_dict, index=['first', 'second'], columns=['a', 'b'])

### 通过其他方法创建

In [26]:
pd.DataFrame.from_records(data=ndarray, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [27]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


## 2. 列操作（增删改查）

In [None]:
df["one"]  # 查

In [None]:
df['three'] = df['one'] * df['two']  # 增
df['flag'] = df['one'] > 2
df

In [None]:
del df["two"]  # 删
three = df.pop("three")
df

In [None]:
df['foo'] = 'bar'  # 改
df

In [None]:
df['one_trunc'] = df['one'][:2]  # 增
df

In [None]:
df.insert(1, 'bar', df['one'])  # 插入                                                    
df

In [None]:
df.assign(test=df["one"] + df["bar"])  # 新副本，原内容不变

In [None]:
df