In [81]:
import pandas as pd
import numpy as np
if pd.__version__  != '3.0.0':
    raise Exception("This notebook requires pandas version 3.0.0")
print(pd.__version__)
people = {
    'first':['John', 'Jane', 'Jim'],
    'last':['Doe', 'Smith', 'Beam'],
    'email':['JohnDoe@email.com', 'JaneSmith@email.com', 'JimBeam@email.com']
}
df=pd.DataFrame(people)
df

3.0.0


Unnamed: 0,first,last,email
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jim,Beam,JimBeam@email.com


In [82]:
df['first']+''+df['last']

0      JohnDoe
1    JaneSmith
2      JimBeam
dtype: str

In [83]:
df['full_name']=df['first']+' '+df['last'] # if use dot notation,python will treat it as an attribute

In [84]:
# 使用赋值替代 inplace=True
df = df.drop(columns=['first','last'])
df

Unnamed: 0,email,full_name
0,JohnDoe@email.com,John Doe
1,JaneSmith@email.com,Jane Smith
2,JimBeam@email.com,Jim Beam


因为你连接时没有加空格——你用的是 df['full_name']=df['first']+''+df['last']，结果是 John + Doe → JohnDoe，字符串中没有空格，按空格分割当然不会拆开。

修复与替代方法（简洁）：

在生成时加空格（推荐）：

df['full_name'] = df['first'] + ' ' + df['last']

df['full_name'].str.split(' ')

直接把拆分结果展开成两列：

df[['first','last']] = df['full_name'].str.split(' ', expand=True)

如果数据已经是 JohnDoe（无空格），可以用“在大写字母前拆分”的正则（对规律为首字母大写的名字有效）：

expand=True 表示把每个分割后的列表“展开”为 DataFrame 的多列；expand=False（默认）则返回每行一个 list 的 Series。

In [85]:
df['full_name'].str.split(' ')

0      [John, Doe]
1    [Jane, Smith]
2      [Jim, Beam]
Name: full_name, dtype: object

右边 df['full_name'].str.split(' ', expand=True)：对每个字符串以空格分割，expand=True 将分割结果“展开”为一个 DataFrame（每个分段一列）；若某行分段少于最大列数，缺失处为 NaN。

左边 df[['first','last']] = ...：把右侧 DataFrame 的第 0 列赋给 first，第 1 列赋给 last（列数和目标列数应匹配，否则请限制分割数或选择子集）。

In [86]:
df[['first','last']] = df['full_name'].str.split(' ', expand=True)
df

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
2,JimBeam@email.com,Jim Beam,Jim,Beam


在较新的 pandas（你当前是 pandas 3.x）中，DataFrame.append 已被移除（此前已被弃用）。

替代写法（示例）：df = pd.concat([df, pd.DataFrame([{'first':'Tony','last':'abc'}])], ignore_index=True)

直接按行写入（简单、交互式可用）：
df.loc[len(df)] = ['Tony', 'abc']           # 按列顺序赋值

df.loc[len(df)] = {'first':'Tony','last':'abc'} #或按列名对齐

In [87]:
df=pd.concat([df,pd.DataFrame([{'first':'Tony','last':'Stark','full_name':'Tony Stark'}])],ignore_index=True)
df

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
2,JimBeam@email.com,Jim Beam,Jim,Beam
3,,Tony Stark,Tony,Stark


Python 不允许在赋值运算符 = 后直接换行结束语句。解析器在行尾遇到 = 就期望后面接表达式，不能以新行开始表达式，除非使用显式或隐式行续行。

In [88]:
people2={
    'first':['Alice', 'Bob'],
    'last':['Wonderland', 'Builder'],
    'email':['alice@example.com', 'bob@example.com']
}
df2=pd.DataFrame(people2)
df2

Unnamed: 0,first,last,email
0,Alice,Wonderland,alice@example.com
1,Bob,Builder,bob@example.com


In [89]:
df=pd.concat([df,df2],ignore_index=True)
df

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
2,JimBeam@email.com,Jim Beam,Jim,Beam
3,,Tony Stark,Tony,Stark
4,alice@example.com,,Alice,Wonderland
5,bob@example.com,,Bob,Builder


In [None]:
df=df.drop(index=5)
df

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
2,JimBeam@email.com,Jim Beam,Jim,Beam
3,,Tony Stark,Tony,Stark
4,alice@example.com,,Alice,Wonderland


df['first'] == 'Jim'：对 first 列逐行比较，得到布尔 Series（True 表示该行 first 为 'Jim'）。

df[df['first'] == 'Jim']：用布尔索引筛出所有匹配的行（一个子 DataFrame）。

.index：取出上一步子 DataFrame 的索引标签列表（要删除的行的标签）。

df.drop(index=...)：按索引标签删除对应行，返回删除后的新 DataFrame（不会就地修改除非你赋回或使用 inplace=True（不推荐））

In [None]:
df.drop(index=df[df['first']=='Jim'].index)

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
3,,Tony Stark,Tony,Stark
4,alice@example.com,,Alice,Wonderland


In [95]:
filt=(df['first']=='Jim')
df.drop(index=df[filt].index)

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
3,,Tony Stark,Tony,Stark
4,alice@example.com,,Alice,Wonderland


In [None]:
df=df.sort_values(by='last',ascending=True) # by meaning ordered
df 

Unnamed: 0,email,full_name,first,last
0,JohnDoe@email.com,John Doe,John,Doe
1,JaneSmith@email.com,Jane Smith,Jane,Smith
3,,Tony Stark,Tony,Stark
4,alice@example.com,,Alice,Wonderland


In [100]:
df=df.sort_values(by=['last','first'],ascending=[False, True]) # by meaning ordered
df 

Unnamed: 0,email,full_name,first,last
4,alice@example.com,,Alice,Wonderland
3,,Tony Stark,Tony,Stark
1,JaneSmith@email.com,Jane Smith,Jane,Smith
0,JohnDoe@email.com,John Doe,John,Doe


In [104]:
df=df.sort_index(ascending=False)
df

Unnamed: 0,email,full_name,first,last
4,alice@example.com,,Alice,Wonderland
3,,Tony Stark,Tony,Stark
1,JaneSmith@email.com,Jane Smith,Jane,Smith
0,JohnDoe@email.com,John Doe,John,Doe


In [105]:
df['last'].sort_values()

0           Doe
1         Smith
3         Stark
4    Wonderland
Name: last, dtype: str