## 创建 Pandas DataFrame

In [1]:
import pandas as pd

**使用 Pandas Series 字典手动创建一个 DataFrame：**

**第一步：创建 Pandas Series 字典**

In [25]:
# Bob和Alice在购物车里的商品。每个序列都含有物品价格和物品标签
items = {'Bob': pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice': pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])
        }
type(items)

dict

**第二步：字典创建完毕后，我们可以将该字典传递给 pd.DataFrame() 函数**

In [4]:
# 注意DataFrame 函数的 D 和 F 要大写
# NaN: not a number
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Alice,Bob
bike,500.0,245.0
book,40.0,
glasses,110.0,
pants,45.0,25.0
watch,,55.0


In [6]:
# 如果没有索引标签，在创建 DataFrame 时 Pandas 会用从0开的数字作为行索引
data = {'Bob': pd.Series(data = [245, 25, 55]),
         'Alice': pd.Series(data = [40, 110, 500, 45])
        }
df = pd.DataFrame(data)
df

Unnamed: 0,Alice,Bob
0,40,245.0
1,110,25.0
2,500,55.0
3,45,


In [7]:
# 索引标签 index labels
shopping_carts.index

Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

In [8]:
# 列标签 columns labels
shopping_carts.columns

Index(['Alice', 'Bob'], dtype='object')

In [9]:
# 数据
shopping_carts.values

array([[ 500.,  245.],
       [  40.,   nan],
       [ 110.,   nan],
       [  45.,   25.],
       [  nan,   55.]])

In [10]:
# 形状信息
# shopping_carts 有两个维度：共有五行两列
shopping_carts.shape

(5, 2)

In [13]:
# 维度
shopping_carts.ndim

2

In [12]:
# 大小
shopping_carts.size

10

In [20]:
# 加载部分【列标签】的数据，注意columns是复数！
bob_shopping_carts = pd.DataFrame(items, columns=['Bob'])
bob_shopping_carts

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [21]:
# 加载部分【索引标签】的数据
sel_shopping_carts = pd.DataFrame(items, index=['pants', 'book'])
sel_shopping_carts

Unnamed: 0,Alice,Bob
pants,45,25.0
book,40,


In [29]:
# 加载部分【索引标签、列标签】的数据，标签都是要注意大小写的 Case Sensitivity
alice_sel_shopping_carts = pd.DataFrame(items, index=['glasses', 'bike'], columns=['Alice'])
alice_sel_shopping_carts

Unnamed: 0,Alice
glasses,110
bike,500


In [31]:
data = {'Intergers': [1, 2, 3],
        'Floats': [4.5, 8.2, 9.6]}
df = pd.DataFrame(data, index=['label1', 'label2', 'label3'])
df

Unnamed: 0,Floats,Intergers
label1,4.5,1
label2,8.2,2
label3,9.6,3


In [35]:
# 输入Python字典列表
items = [{'bikes': 20, 'pants': 30, 'watches': 35}, {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants': 5}]
store_items = pd.DataFrame(items, index=['store1', 'store2'])
store_items

Unnamed: 0,bikes,glasses,pants,watches
store1,20,,30,35
store2,15,50.0,5,10
