# 【pandas】データフレームの行をdf[bool]・queryで抽出する

In [1]:
import pandas as pd
import seaborn as sns

In [2]:
fmri = sns.load_dataset("fmri")
fmri.head()

Unnamed: 0,subject,timepoint,event,region,signal
0,s13,18,stim,parietal,-0.017552
1,s5,14,stim,parietal,-0.080883
2,s12,18,stim,parietal,-0.081033
3,s11,18,stim,parietal,-0.046134
4,s10,18,stim,parietal,-0.03797


In [3]:
print(fmri.value_counts('event'))
print("-----")
print(fmri.value_counts('region'))

event
cue     532
stim    532
dtype: int64
-----
region
frontal     532
parietal    532
dtype: int64


## df[df[colunn] == x]
[]内がTrue or Falseによって決定している

### 基本形

In [4]:
fmri_cue = fmri[fmri['event'] == "cue"]
fmri_cue.head()

Unnamed: 0,subject,timepoint,event,region,signal
532,s3,4,cue,parietal,0.058219
533,s6,5,cue,parietal,0.038145
534,s7,5,cue,parietal,-0.008158
535,s8,5,cue,parietal,0.047136
536,s9,5,cue,parietal,0.055847


In [5]:
fmri_cue.value_counts('event')

event
cue    532
dtype: int64

### 複数項目の絞り込み

In [6]:
fmri_cue2 = fmri[(fmri['event'] == "cue") & (fmri['region'] != 'parietal')]
fmri_cue2.head()

Unnamed: 0,subject,timepoint,event,region,signal
566,s4,14,cue,frontal,-0.026796
579,s5,14,cue,frontal,-0.017213
596,s9,0,cue,frontal,-0.008117
597,s9,6,cue,frontal,0.026864
598,s3,14,cue,frontal,-0.030614


### and or not 演算子の注意点

注意点：and,or,notは使えません。&,|,~を使ってつなぎます。

In [7]:
fmri_subject = fmri[(fmri['subject'] == "s4") | (fmri['subject'] == "s5")]
fmri_subject.head()

Unnamed: 0,subject,timepoint,event,region,signal
1,s5,14,stim,parietal,-0.080883
9,s5,18,stim,parietal,-0.040557
10,s4,18,stim,parietal,-0.048812
23,s5,17,stim,parietal,-0.056682
24,s4,17,stim,parietal,-0.044582


### in演算子の使い方

In [8]:
# isin()を使う
fmri_subject2 = fmri[fmri['subject'].isin(["s4","s5"])]
fmri_subject2.head()

Unnamed: 0,subject,timepoint,event,region,signal
1,s5,14,stim,parietal,-0.080883
9,s5,18,stim,parietal,-0.040557
10,s4,18,stim,parietal,-0.048812
23,s5,17,stim,parietal,-0.056682
24,s4,17,stim,parietal,-0.044582


## df.query('columns == "str"')

### 基本形

In [9]:
fmri_stim = fmri.query('event == "stim"')
fmri_stim.head()

Unnamed: 0,subject,timepoint,event,region,signal
0,s13,18,stim,parietal,-0.017552
1,s5,14,stim,parietal,-0.080883
2,s12,18,stim,parietal,-0.081033
3,s11,18,stim,parietal,-0.046134
4,s10,18,stim,parietal,-0.03797


### 複数項目の絞り込み

In [10]:
fmri_stim2 = fmri.query('event == "stim" & region != "parietal"')
fmri_stim2.head()

Unnamed: 0,subject,timepoint,event,region,signal
67,s0,0,stim,frontal,-0.021452
170,s2,6,stim,frontal,0.10105
267,s10,4,stim,frontal,0.030044
268,s11,4,stim,frontal,0.075957
269,s3,0,stim,frontal,0.011056


and or not でも & | ~ いずれも使用可能。

In [11]:
fmri_subject3 = fmri.query('subject not in ["s13", "s12"]')
fmri_subject3.head()

Unnamed: 0,subject,timepoint,event,region,signal
1,s5,14,stim,parietal,-0.080883
3,s11,18,stim,parietal,-0.046134
4,s10,18,stim,parietal,-0.03797
5,s9,18,stim,parietal,-0.103513
6,s8,18,stim,parietal,-0.064408


In [12]:
from pandas import DataFrame
import numpy as np
df = DataFrame({
    'A':["alpha","beta", np.nan],
    'B':["ABC",np.nan,"GHI"]})
df

Unnamed: 0,A,B
0,alpha,ABC
1,beta,
2,,GHI


In [13]:
df_new = df.query('B == B')
df_new

Unnamed: 0,A,B
0,alpha,ABC
2,,GHI


### 変数での指定
@変数で変数が使えます。

In [14]:
subject_list = ["s13", "s12", "s11"]
event_value = "stim"
fmri_subject4 = fmri.query('subject in @subject_list and event != @event_value ')
fmri_subject4.head()

Unnamed: 0,subject,timepoint,event,region,signal
540,s12,5,cue,parietal,0.047577
551,s13,4,cue,parietal,0.053692
552,s12,4,cue,parietal,0.058198
553,s11,4,cue,parietal,0.008013
561,s11,2,cue,parietal,-0.054846


### 文字列の指定
str.contains()<br>
str.endswith()<br>
str.startswith()<br>
str.match()

In [15]:
fmri_start = fmri.query('subject.str.startswith("s1")', engine='python')
fmri_start.head(3)

Unnamed: 0,subject,timepoint,event,region,signal
0,s13,18,stim,parietal,-0.017552
2,s12,18,stim,parietal,-0.081033
3,s11,18,stim,parietal,-0.046134


In [16]:
fmri_end = fmri.query('subject.str.endswith("0")', engine='python')
fmri_end.head(3)

Unnamed: 0,subject,timepoint,event,region,signal
4,s10,18,stim,parietal,-0.03797
14,s0,18,stim,parietal,-0.07557
18,s10,17,stim,parietal,-0.016847


In [17]:
fmri_ct = fmri.query('subject.str.contains("2")', engine='python')
fmri_ct.head(3)

Unnamed: 0,subject,timepoint,event,region,signal
2,s12,18,stim,parietal,-0.081033
12,s2,18,stim,parietal,-0.086623
16,s12,17,stim,parietal,-0.088512


### オブジェクトを上書き
変数に入れずにinplace=Trueで上書き

In [18]:
fmri.query('event == "stim"', inplace=True)
fmri.head()

Unnamed: 0,subject,timepoint,event,region,signal
0,s13,18,stim,parietal,-0.017552
1,s5,14,stim,parietal,-0.080883
2,s12,18,stim,parietal,-0.081033
3,s11,18,stim,parietal,-0.046134
4,s10,18,stim,parietal,-0.03797
