In [1]:
import numpy as np
import pandas as pd

# From dict of Series or dicts （从 `Series的字典` 或者 `字典的字典`）

In [20]:
# 字典的key是不可变对象，所以只有从value上嵌套字典.
# Series的字典
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}

# 字典的字典 
d1 = {
     66: {'a': 1.0, 'b': 2.0, 'c': 3.0},
     88: {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': 4.0}
}

df = pd.DataFrame(d)
print(df, '\n')

df1 = pd.DataFrame(d1)
print(df1)

   one  two
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0 

    66   88
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0


In [14]:
# index 、columns 属性
print(df.index, df.columns, sep='\n')

Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')


In [6]:
pd.DataFrame(d, index=["d", "b", "a"])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [8]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["one", "two", "three"])

Unnamed: 0,one,two,three
d,,4.0,
b,2.0,2.0,
a,1.0,1.0,


In [10]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


# From dict of ndarrays / lists （从 `多维数组的字典` 或者 `列表的字典`）

In [51]:
# 从多维数组的字典
d1 = {22: np.linspace(2, 8, 4), '44': np.linspace(6, 9, 4)}
print(d1, d1.keys(), d1.values(), sep='\n')

df1 = pd.DataFrame(d, index=['you', 'and', 'me', 'together'])
df1

{22: array([2., 4., 6., 8.]), '44': array([6., 7., 8., 9.])}
dict_keys([22, '44'])
dict_values([array([2., 4., 6., 8.]), array([6., 7., 8., 9.])])


Unnamed: 0,one,two
you,1.0,4.0
and,2.0,3.0
me,3.0,2.0
together,4.0,1.0


In [54]:
# 从列表的字典
d2 = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
print(d2, d2.keys(), d2.values(), sep='\n')

df2 = pd.DataFrame(d, index=['you', 'and', 'me', 'two'])
df2

{'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}
dict_keys(['one', 'two'])
dict_values([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0]])


Unnamed: 0,one,two
you,1.0,4.0
and,2.0,3.0
me,3.0,2.0
two,4.0,1.0


# From a list of dicts （从字典列表）

In [63]:
data1= [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]

pd.DataFrame(data1)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [68]:
df = pd.DataFrame(data1, index=[66, "second"])
df

Unnamed: 0,a,b,c
66,1,2,
second,5,10,20.0


In [70]:
pd.DataFrame(data1, columns=["a", "b"])

Unnamed: 0,a,b
0,1,2
1,5,10


# Column selection, addition, deletion （列的选择，添加，删除）

In [137]:
df['a']

66        1
second    5
Name: a, dtype: int64

In [138]:
df[88] = df['a'] * df['c']
df

Unnamed: 0,a,b,c,foo,88
66,1,2,,bar,
second,5,10,20.0,bar,100.0


In [139]:
df['bool'] = df['a'] >3
df

Unnamed: 0,a,b,c,foo,88,bool
66,1,2,,bar,,False
second,5,10,20.0,bar,100.0,True


In [140]:
# 删除列
del df['bool']
print(df)
print('-'*40)

three = df.pop(88)
print(three)
df

        a   b     c  foo     88
66      1   2   NaN  bar    NaN
second  5  10  20.0  bar  100.0
----------------------------------------
66          NaN
second    100.0
Name: 88, dtype: float64


Unnamed: 0,a,b,c,foo
66,1,2,,bar
second,5,10,20.0,bar


In [141]:
# When inserting a scalar value, it will naturally be propagated to fill the column.
df["foo"] = "bar"
df

Unnamed: 0,a,b,c,foo
66,1,2,,bar
second,5,10,20.0,bar


In [142]:
df

Unnamed: 0,a,b,c,foo
66,1,2,,bar
second,5,10,20.0,bar


In [143]:
# 通过标签选择行，返回Series
df.loc[66]

a        1
b        2
c      NaN
foo    bar
Name: 66, dtype: object

In [144]:
# 通过整数定位选择行，返回Series
df.iloc[1]

a         5
b        10
c      20.0
foo     bar
Name: second, dtype: object

In [154]:
# 选择行，返回DataFrame
df[0:1]

Unnamed: 0,a,b,c,foo
66,1,2,,bar


# Data alignment and arithmetic （数据对齐和算术）

In [156]:
df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])

df2 = pd.DataFrame(np.random.randn(7, 3), columns=["A", "B", "C"])

df + df2

Unnamed: 0,A,B,C,D
0,-0.204583,-0.036403,-2.609017,
1,-1.235627,-0.107528,1.051367,
2,-0.076159,-0.475361,-1.210355,
3,1.541921,2.433145,0.941869,
4,-1.524824,2.145469,3.34643,
5,0.488028,-0.858226,-1.69623,
6,0.358485,0.974912,-1.240994,
7,,,,
8,,,,
9,,,,


In [159]:
df - df.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,-1.443348,-0.758676,2.980478,-1.198735
2,0.965814,-1.709082,0.809775,-0.325642
3,0.486363,1.142135,2.794139,-0.662611
4,-1.211406,0.588026,3.507337,-1.942058
5,0.430481,0.837151,1.783279,-1.259167
6,-1.127274,-1.225361,0.435295,-0.395587
7,-0.545789,-1.515276,1.508507,-0.209288
8,-1.08793,-0.446827,1.455255,-2.373353
9,-2.249915,-0.038779,2.398386,-1.400359


In [164]:
df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
print(df)
df * 5

          A         B         C         D
0  0.497208 -0.068918  1.097084 -0.899213
1 -0.868029 -0.028722  0.774216  0.833607
2  0.227512  1.023738 -1.915376  1.106345
3 -0.320039  1.056687 -0.222126 -1.265995
4  0.723671 -1.208214  1.223327  1.417718
5 -0.694394  0.253839 -0.083271 -1.366001
6 -0.689010  1.055523  0.524109  0.562455
7 -0.300914  0.439087  1.291650 -0.042703
8 -0.473371 -0.449786 -2.140976 -1.148120
9  0.137790 -0.318675  1.865364  0.258543


Unnamed: 0,A,B,C,D
0,2.486039,-0.344589,5.485419,-4.496067
1,-4.340146,-0.143611,3.871078,4.168035
2,1.137561,5.118691,-9.576882,5.531727
3,-1.600194,5.283435,-1.110628,-6.329977
4,3.618355,-6.04107,6.116634,7.08859
5,-3.471971,1.269193,-0.416353,-6.830005
6,-3.445049,5.277615,2.620547,2.812277
7,-1.504568,2.195437,6.45825,-0.213516
8,-2.366853,-2.248928,-10.704878,-5.7406
9,0.688949,-1.593375,9.326819,1.292714


In [171]:
# 布尔值
df1 = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]}, dtype=bool)
print(df1, '\n')

df2 = pd.DataFrame({"a": [0, 1, 1], "b": [1, 1, 0]}, dtype=bool)
print(df2)

       a      b
0   True  False
1  False   True
2   True   True 

       a      b
0  False   True
1   True   True
2   True  False


In [172]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False


# 转置

In [175]:
df = df[:3]
df

Unnamed: 0,A,B,C,D
0,0.497208,-0.068918,1.097084,-0.899213
1,-0.868029,-0.028722,0.774216,0.833607
2,0.227512,1.023738,-1.915376,1.106345


In [176]:
df.T

Unnamed: 0,0,1,2
A,0.497208,-0.868029,0.227512
B,-0.068918,-0.028722,1.023738
C,1.097084,0.774216,-1.915376
D,-0.899213,0.833607,1.106345


In [177]:
df.transpose()

Unnamed: 0,0,1,2
A,0.497208,-0.868029,0.227512
B,-0.068918,-0.028722,1.023738
C,1.097084,0.774216,-1.915376
D,-0.899213,0.833607,1.106345
