In [121]:
import pandas as pd
import numpy as np
data = [
    {
        "name": "zhangsan",
        "age": 14,
        "sex": "boy",
    },
    {
        "name": "zhangsan",
        "age": 6,
        "sex": "xiaohai",
    },
    {
        "name": "lisi",
        "age": 15,
        "sex": "girls"
    },
    {
        "name": "wangwu",
        "age": 13,
        "sex": "girls"    
    },
    {
        "name": "hutao",
        "age": np.NaN,
        "sex": "girls"    
    }
]
df = pd.DataFrame(data=data)
df

Unnamed: 0,name,age,sex
0,zhangsan,14.0,boy
1,zhangsan,6.0,xiaohai
2,lisi,15.0,girls
3,wangwu,13.0,girls
4,hutao,,girls


In [92]:
"""输出df的所有列名"""
df.columns

Index(['name', 'age', 'sex'], dtype='object')

In [123]:
"""修改列名"""
df.rename(columns={"name":"title"}, inplace=True)
df

Unnamed: 0,title,age,sex
0,zhangsan,14.0,boy
1,zhangsan,6.0,xiaohai
2,lisi,15.0,girls
3,wangwu,13.0,girls
4,hutao,,girls


In [94]:
"""统计某一列某个数据出现的次数"""
df["age"].value_counts()

14.0    1
6.0     1
15.0    1
13.0    1
Name: age, dtype: int64

In [126]:
"""将空值用上下值的平均值填充"""
df["age"] = df["age"].fillna(df["age"].interpolate())
df

Unnamed: 0,title,age,sex
0,zhangsan,14.0,boy
2,lisi,15.0,girls
3,wangwu,13.0,girls
4,hutao,13.0,girls


In [96]:
"""提取某一列值大于3的行"""
df[df["age"]>=14]

Unnamed: 0,title,age,sex
0,zhangsan,14.0,boy
2,lisi,15.0,girls


In [127]:
df = df.drop_duplicates(["title"])
df

Unnamed: 0,title,age,sex
0,zhangsan,14.0,boy
2,lisi,15.0,girls
3,wangwu,13.0,girls
4,hutao,13.0,girls


In [98]:
"""求一列数字的平均值"""
df["age"].mean()

13.75

In [99]:
"""将DataFrame某一列变为列表"""
df["title"].tolist()

['zhangsan', 'lisi', 'wangwu', 'hutao']

In [100]:
"""查看数据行列数"""
df.shape 

(4, 3)

In [101]:
"""提取列中值大于14小于16的行"""
df[(df["age"]>14)&(df["age"]<16)]

Unnamed: 0,title,age,sex
2,lisi,15.0,girls


In [102]:
"""交换两列的位置"""
cols=df.columns[[1,0,2]]
df=df[cols]
df

Unnamed: 0,age,title,sex
0,14.0,zhangsan,boy
2,15.0,lisi,girls
3,13.0,wangwu,girls
4,13.0,hutao,girls


In [103]:
"""提取某一列最大值所在行"""
df[df["age"]==df["age"].max()]

Unnamed: 0,age,title,sex
2,15.0,lisi,girls


In [104]:
"""查看最后5行数据"""
df.tail(3)

Unnamed: 0,age,title,sex
2,15.0,lisi,girls
3,13.0,wangwu,girls
4,13.0,hutao,girls


In [128]:
"""删除最后一行数据"""
# df.drop([len(df)-1], inplace=True)
df.drop([len(df)-1],inplace=True)
df


Unnamed: 0,title,age,sex
0,zhangsan,14.0,boy
2,lisi,15.0,girls
4,hutao,13.0,girls


In [130]:
"""添加一行数据"""
row = {"title": "math", "age":2000, "sex": np.NaN}
df = df.append(row, ignore_index=True)
df

Unnamed: 0,title,age,sex
0,zhangsan,14.0,boy
1,lisi,15.0,girls
2,hutao,13.0,girls
3,math,2000.0,


In [131]:
"""对数据安装列值的大小进行排序"""
df.sort_values("age", inplace=True)
df

Unnamed: 0,title,age,sex
2,hutao,13.0,girls
0,zhangsan,14.0,boy
1,lisi,15.0,girls
3,math,2000.0,


In [134]:
"""统计某一列数据每个字符串的长度"""
df["title"] = df["title"].fillna("R")
df["len_ste"] = df["title"].map(lambda x:len(x))
df

Unnamed: 0,title,age,sex,len_ste
2,hutao,13.0,girls,5
0,zhangsan,14.0,boy,8
1,lisi,15.0,girls,4
3,math,2000.0,,4
