In [3]:
import pandas as pd

sample = {
    'app_type': [1, 2, 3, None, 1, 2, 3, None],
    'app_way': [1, 2, None, 1, 2, None, 1, 2],
    'import_cd': [1, 2, 3, 11, 12, 13, None, 1],
    'status_cd': [None, 380, 410, 250, None, 380, 410, 250]
}
df = pd.DataFrame(sample)

In [4]:
bool_df = df['app_type'] == df['app_way']

In [9]:
# applyでgetメソッドを指定すると、どう動くのか

print(df['app_type'])

convert_app_type_dict = {
    1: "aaa",
    2: "bbb",
    3: "ccc",
    None: "やばい"
}

sample_df = df['app_type'].apply(convert_app_type_dict.get)
print(sample_df)


0    1.0
1    2.0
2    3.0
3    NaN
4    1.0
5    2.0
6    3.0
7    NaN
Name: app_type, dtype: float64
0     aaa
1     bbb
2     ccc
3    None
4     aaa
5     bbb
6     ccc
7    None
Name: app_type, dtype: object


In [4]:
# 新しい行
df['new_way'] = bool_df.apply(lambda x: 1 if x else 0)
df.head()

Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
0,1.0,1.0,1.0,,1
1,2.0,2.0,2.0,380.0,1
2,3.0,,3.0,410.0,0
3,,1.0,11.0,250.0,0
4,1.0,2.0,12.0,,0


In [5]:
# 既存行
df['app_type'] = bool_df.apply(lambda x: 1 if x else 0)
df.head()

Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
0,1,1.0,1.0,,1
1,1,2.0,2.0,380.0,1
2,0,,3.0,410.0,0
3,0,1.0,11.0,250.0,0
4,0,2.0,12.0,,0


In [6]:
def abc(row):
    app_type = row['app_type']
    app_way = row['app_way']
    
    if app_type == app_way:
        return 1
    return 0
    
int_method = df.apply(abc, axis=1)
int_method.head()

0    1
1    0
2    0
3    0
4    0
dtype: int64

In [7]:
int_lambda = df.apply(lambda x: 1 if x['app_type'] == x['app_way'] else 0, axis=1)
int_lambda.head()

0    1
1    0
2    0
3    0
4    0
dtype: int64

In [8]:
# loc使用
df.loc[~bool_df, 'import_cd'] = 'False'
df.head()

Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
0,1,1.0,1,,1
1,1,2.0,2,380.0,1
2,0,,False,410.0,0
3,0,1.0,False,250.0,0
4,0,2.0,False,,0


In [9]:
# whereで新しいseriesを作成
convert_not_false = df['import_cd'].where(df['import_cd'] == 'False', 'False以外を置換')
convert_not_false.head()

0    False以外を置換
1    False以外を置換
2         False
3         False
4         False
Name: import_cd, dtype: object

In [10]:
# maskで新しいseriesを作成
convert_false = df['import_cd'].mask(df['import_cd'] == 'False', 'Falseを置換')
convert_false.head()

0           1
1           2
2    Falseを置換
3    Falseを置換
4    Falseを置換
Name: import_cd, dtype: object

In [11]:
# maskでdataframeの列を変換
df['import_cd'] = df['import_cd'].mask(df['import_cd'] == 'False', 'Falseを置換')
df.head()

Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
0,1,1.0,1,,1
1,1,2.0,2,380.0,1
2,0,,Falseを置換,410.0,0
3,0,1.0,Falseを置換,250.0,0
4,0,2.0,Falseを置換,,0


In [17]:
# assign使用
new_df = df.assign(new_way=df['status_cd'])
new_df.head()

Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
0,1,1.0,1,,
1,1,2.0,2,380.0,380.0
2,0,,Falseを置換,410.0,410.0
3,0,1.0,Falseを置換,250.0,250.0
4,0,2.0,Falseを置換,,


In [21]:
# assign使用2
# applyでメソッド使用
# 既存データフレームを上書き
def sample_assign(cd):
    if cd == 1:
        return '1でした'
    
    if cd == 0:
        return '0でした'
    
    return 'その他'

convert_new_way_df= df.assign(new_way=df['new_way'].apply(sample_assign))
print(df.head())
print(convert_new_way_df.head())

   app_type  app_way import_cd  status_cd  new_way
0         1      1.0         1        NaN        1
1         1      2.0         2      380.0        1
2         0      NaN  Falseを置換      410.0        0
3         0      1.0  Falseを置換      250.0        0
4         0      2.0  Falseを置換        NaN        0
   app_type  app_way import_cd  status_cd new_way
0         1      1.0         1        NaN    1でした
1         1      2.0         2      380.0    1でした
2         0      NaN  Falseを置換      410.0    0でした
3         0      1.0  Falseを置換      250.0    0でした
4         0      2.0  Falseを置換        NaN    0でした


In [50]:
# app_typeが0のデータのみ抽出
filter_df = df[df['app_type'] == 0]
filter_df.head()

Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
2,0,,Falseを置換,410.0,0
3,0,1.0,Falseを置換,250.0,0
4,0,2.0,Falseを置換,,0
5,0,,Falseを置換,380.0,0
6,0,1.0,Falseを置換,410.0,0


In [51]:
aaa = {
    0: 'aaa'
}

# Warnig発生
filter_df['app_type'] = filter_df['app_type'].apply(lambda x: aaa.get(x))
filter_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filter_df['app_type'] = filter_df['app_type'].apply(lambda x: aaa.get(x))


Unnamed: 0,app_type,app_way,import_cd,status_cd,new_way
2,aaa,,Falseを置換,410.0,0
3,aaa,1.0,Falseを置換,250.0,0
4,aaa,2.0,Falseを置換,,0
5,aaa,,Falseを置換,380.0,0
6,aaa,1.0,Falseを置換,410.0,0
7,aaa,2.0,Falseを置換,250.0,0


In [52]:
aaa = {
    'aaa': 0
}

# seriesで取得できる
filter_df = filter_df['app_type'].apply(lambda x: aaa.get(x))
filter_df

2    0
3    0
4    0
5    0
6    0
7    0
Name: app_type, dtype: int64

In [54]:
# dataframeが空の場合でもエラーにはならない
sample_df = pd.DataFrame({
    'name'    : ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'height' : [1.66, 1.68, 1.70, 1.72, 1.75],
    'weight'    : [50, 55, 65, 75, 80]    
})


def calc_shape(x):
    bmi = x.weight / (x.height ** 2)
    if bmi >= 25:
        return("Overweight")
    elif bmi >= 18.5:
        return("Normal")
    else:
        return("Thin")
    
sample2_df = sample_df[sample_df["name"] == "Foo"]
print(sample2_df)

sample2_df["name"] = sample2_df.apply(lambda x: calc_shape(x), axis=1)
sample2_df

Empty DataFrame
Columns: [name, height, weight]
Index: []


Unnamed: 0,name,height,weight
