In [326]:
import pandas as pd
import numpy as np

In [327]:
people = {
    'first':['John', 'Jane', 'Jim'],
    'last':['Doe', 'Smith', 'Beam'],
    'email':['JohnDoe@email.com', 'JaneSmith@email.com', 'JimBeam@email.com']
}
df=pd.DataFrame(people)

In [328]:
df.columns

Index(['first', 'last', 'email'], dtype='str')

In [329]:
df.columns = ['first_name', 'last_name', 'email_address']
df

Unnamed: 0,first_name,last_name,email_address
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jim,Beam,JimBeam@email.com


In [330]:
df.columns=[x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ADDRESS
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jim,Beam,JimBeam@email.com


In [331]:
df.columns = df.columns.str.replace('_', ' ')
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL ADDRESS
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jim,Beam,JimBeam@email.com


In [332]:
df.columns = df.columns.str.replace(' ', '_').str.lower()
df

Unnamed: 0,first_name,last_name,email_address
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jim,Beam,JimBeam@email.com


In [333]:
df.rename(columns={'first_name':'first', 'last_name':'last', 'email_address':'email'}, inplace=True)
df  

Unnamed: 0,first,last,email
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jim,Beam,JimBeam@email.com


In [334]:
df.loc[2]=['Jimmy', 'Beam', 'JimmyBeam@email.com']
df

Unnamed: 0,first,last,email
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jimmy,Beam,JimmyBeam@email.com


In [335]:
df.loc[2, 'email'] = 'JimmyBeam@gmail.com'
df

Unnamed: 0,first,last,email
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jimmy,Beam,JimmyBeam@gmail.com


In [336]:
df.at[2, 'email'] = 'JimmyBeam@email.com'
df

Unnamed: 0,first,last,email
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jimmy,Beam,JimmyBeam@email.com


In [337]:
filt=(df['first']=='Jimmy')
df[filt]['last']='Brown' #df[filt]['last'] = 'Brown' 是链式索引 —— df[filt] 返回的是原 DataFrame 的视图或副本，中间对象可能是副本（copy-on-write），对它赋值不会安全地更新原始 df，因此 pandas 阻止这种操作并抛出 ChainedAssignmentError（或在旧版本里是

C:\Users\abc\AppData\Local\Temp\ipykernel_46048\2700676449.py:2: ChainedAssignmentError: A value is being set on a copy of a DataFrame or Series through chained assignment.
Such chained assignment never works to update the original DataFrame or Series, because the intermediate object on which we are setting values always behaves as a copy (due to Copy-on-Write).

Try using '.loc[row_indexer, col_indexer] = value' instead, to perform the assignment in a single step.

See the documentation for a more detailed explanation: https://pandas.pydata.org/pandas-docs/stable/user_guide/copy_on_write.html#chained-assignment
  df[filt]['last']='Brown' #df[filt]['last'] = 'Brown' 是链式索引 —— df[filt] 返回的是原 DataFrame 的视图或副本，中间对象可能是副本（copy-on-write），对它赋值不会安全地更新原始 df，因此 pandas 阻止这种操作并抛出 ChainedAssignmentError（或在旧版本里是


In [338]:
df.loc[filt, 'last'] = 'Brown' # 
df

Unnamed: 0,first,last,email
0,John,Doe,JohnDoe@email.com
1,Jane,Smith,JaneSmith@email.com
2,Jimmy,Brown,JimmyBeam@email.com


multiple change to lowercase

In [339]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,John,Doe,johndoe@email.com
1,Jane,Smith,janesmith@email.com
2,Jimmy,Brown,jimmybeam@email.com


apply：对 Series 的每个元素或对 DataFrame 的每一行/列应用函数。

map：对 Series 的每个值进行映射，常用字典或映射函数。

applymap：对 DataFrame 中的每个元素逐个应用函数（逐元素操作）。

replace：按值替换（支持 dict、list、正则、序列），可同时替换多列多个值。比 map 更灵活，且不要求逐元素函数。

In [340]:
df['email'].apply(len)

0    17
1    19
2    19
Name: email, dtype: int64

In [341]:
def upper_email(email):
    return email.upper()
df['email'].apply(upper_email)

0      JOHNDOE@EMAIL.COM
1    JANESMITH@EMAIL.COM
2    JIMMYBEAM@EMAIL.COM
Name: email, dtype: str

In [342]:
df['email'] = df['email'].apply(upper_email)
df

Unnamed: 0,first,last,email
0,John,Doe,JOHNDOE@EMAIL.COM
1,Jane,Smith,JANESMITH@EMAIL.COM
2,Jimmy,Brown,JIMMYBEAM@EMAIL.COM


In [343]:
df['email']=df['email'].apply(lambda x: x.lower()) #lambda 参数: 表达式
df

Unnamed: 0,first,last,email
0,John,Doe,johndoe@email.com
1,Jane,Smith,janesmith@email.com
2,Jimmy,Brown,jimmybeam@email.com


In [344]:
# 诊断：检查 pandas 版本、df 类型及 applymap 是否存在
import pandas as pd
print('pd.__version__ =', pd.__version__)
print('type(df) =', type(df))
print("hasattr(df, 'applymap') =", hasattr(df, 'applymap'))
print("hasattr(pd.DataFrame, 'applymap') =", hasattr(pd.DataFrame, 'applymap'))
print("'applymap' in dir(type(df)) =", 'applymap' in dir(type(df)))
print('\ndf.head():')
print(df.head())

pd.__version__ = 3.0.0
type(df) = <class 'pandas.DataFrame'>
hasattr(df, 'applymap') = False
hasattr(pd.DataFrame, 'applymap') = False
'applymap' in dir(type(df)) = False

df.head():
   first   last                email
0   John    Doe    johndoe@email.com
1   Jane  Smith  janesmith@email.com
2  Jimmy  Brown  jimmybeam@email.com


In [345]:
print(df.apply(len,axis='rows')) # applies to each column
print('--------------------')
print(df.apply(len,axis='columns')) # applies to each row

first    3
last     3
email    3
dtype: int64
--------------------
0    3
1    3
2    3
dtype: int64


In [346]:
print(len(df)) # number of rows
len(df['first'])

3


3

In [347]:
df.apply(pd.Series.min) #minimum value in each column

first                   Jane
last                   Brown
email    janesmith@email.com
dtype: str

In [348]:
df.apply(lambda x: x.min())

first                   Jane
last                   Brown
email    janesmith@email.com
dtype: str

In [349]:
# pandas 3.0.0 中已移除 DataFrame.applymap，使用向量化或按列 apply 替代
str_cols = df.select_dtypes(include=['object', 'string']).columns
if len(str_cols) == 0:
    print('No string-like columns to compute lengths')
else:
    # 向量化按列计算字符串长度（更快且兼容 pandas 3）
    lengths = df[str_cols].apply(lambda s: s.str.len())
    lengths

**pandas 3.0：关于 `applymap` 报错的原因与替代写法**

- 错误原因：在 pandas 3.0 中，`DataFrame.applymap` 已被移除，直接调用 `df.applymap(...)` 会导致 `AttributeError`（我们在诊断中看到 `pd.__version__ == '3.0.0'`）。

- 影响：如果代码中仍使用 `df.applymap(...)`，将无法运行，需要改用按列的向量化方法或对单列使用 `Series` 的方法。

- 推荐替代（按用途）：

```
# 错误（旧写法）
# df.applymap(len)

# 推荐：仅对字符串列按列计算长度（向量化且兼容 pandas 3）
str_cols = df.select_dtypes(include=['object', 'string']).columns
lengths = df[str_cols].apply(lambda s: s.str.len())

# 单列更简洁的写法（优先）：
df['email'].str.len()
```

- 说明：`apply` 在 DataFrame 上可用于按列/按行执行（传入 Series），而 `.str`、`.map`、`.apply`（针对 Series）等向量化方法通常更快、更安全（且避免链式赋值问题）。


In [350]:
print(df.apply(lambda s: s.astype(str).str.len())) # 适用于所有元素
print(df)

   first  last  email
0      4     3     17
1      4     5     19
2      5     5     19
   first   last                email
0   John    Doe    johndoe@email.com
1   Jane  Smith  janesmith@email.com
2  Jimmy  Brown  jimmybeam@email.com


In [351]:
df=df.apply(lambda x: x.astype(str).str.lower()) # 适用于所有元素
df

Unnamed: 0,first,last,email
0,john,doe,johndoe@email.com
1,jane,smith,janesmith@email.com
2,jimmy,brown,jimmybeam@email.com


In [352]:
df['first'].map({'john':'JOHN', 'jane':'JANE'}) 
# 如果字典中没有对应的键，则结果为 NaN

0    JOHN
1    JANE
2     NaN
Name: first, dtype: str

In [353]:
df=df['first'].replace({'john':'Jonathan', 'jane':'Janet'}) # 如果使用map，会导致没有被命中的值变成NaN，使用replace则不会
df


0    Jonathan
1       Janet
2       jimmy
Name: first, dtype: str