# Filtering DataFrame by One Condition

##  ----  Quick Summary ----

## same
### titanic[titanic["sex"]=="male"]
### titanic.loc[titanic["sex"]=="male"] " 1 column
### titanic[titanic["sex"]=="male"][["fare","age"]] " more than 1 column
---
### Advantage of .loc is adding column in it
### titanic.loc[titanic["sex"]=="male"]
### titanic.loc[titanic["sex"]=="male", "fare"] "series output
### titanic.loc[titanic["sex"]=="male",["fare"]] " 1 column, dataframe output
### titanic.loc[titanic["sex"]=="male",["fare","age"]] " more than 1 column
---

# PART1: "titanic" DATASET

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic.csv")

In [3]:
titanic.head(10)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,


In [4]:
titanic.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'deck'],
      dtype='object')

In [5]:
titanic.sex.head(10)

0      male
1    female
2    female
3    female
4      male
5      male
6      male
7      male
8    female
9    female
Name: sex, dtype: object

In [6]:
titanic.sex == "male"

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

# filter as DataFrame

In [20]:
titanic[titanic["sex"]=="male"]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
4,0,3,male,35.0,0,0,8.0500,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.0750,S,
...,...,...,...,...,...,...,...,...,...
883,0,2,male,28.0,0,0,10.5000,S,
884,0,3,male,25.0,0,0,7.0500,S,
886,0,2,male,27.0,0,0,13.0000,S,
889,1,1,male,26.0,0,0,30.0000,C,C


# 1 columns with [ " " ]

In [22]:
titanic[titanic["sex"]=="male"]["fare"]

0       7.2500
4       8.0500
5       8.4583
6      51.8625
7      21.0750
        ...   
883    10.5000
884     7.0500
886    13.0000
889    30.0000
890     7.7500
Name: fare, Length: 577, dtype: float64

# more than 1 columns with [ [" "," "] ]

In [24]:
titanic[titanic["sex"]=="male"][["fare","age"]]

Unnamed: 0,fare,age
0,7.2500,22.0
4,8.0500,35.0
5,8.4583,
6,51.8625,54.0
7,21.0750,2.0
...,...,...
883,10.5000,28.0
884,7.0500,25.0
886,13.0000,27.0
889,30.0000,26.0


In [25]:
print("--"*500)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# .loc is better you can add columns in it (same filter as DataFrame)

In [8]:
titanic.loc[titanic["sex"]=="male"]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
4,0,3,male,35.0,0,0,8.0500,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.0750,S,
...,...,...,...,...,...,...,...,...,...
883,0,2,male,28.0,0,0,10.5000,S,
884,0,3,male,25.0,0,0,7.0500,S,
886,0,2,male,27.0,0,0,13.0000,S,
889,1,1,male,26.0,0,0,30.0000,C,C


In [9]:
titanic.loc[titanic["sex"]=="male","fare"]

0       7.2500
4       8.0500
5       8.4583
6      51.8625
7      21.0750
        ...   
883    10.5000
884     7.0500
886    13.0000
889    30.0000
890     7.7500
Name: fare, Length: 577, dtype: float64

In [10]:
titanic.loc[titanic["sex"]=="male",["fare"]]

Unnamed: 0,fare
0,7.2500
4,8.0500
5,8.4583
6,51.8625
7,21.0750
...,...
883,10.5000
884,7.0500
886,13.0000
889,30.0000


## More Columns

In [11]:
titanic.loc[titanic["sex"]=="male",["fare","age"]]

Unnamed: 0,fare,age
0,7.2500,22.0
4,8.0500,35.0
5,8.4583,
6,51.8625,54.0
7,21.0750,2.0
...,...,...
883,10.5000,28.0
884,7.0500,25.0
886,13.0000,27.0
889,30.0000,26.0


In [30]:
print("--"*500)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

## assigning to a variable

In [27]:
mask1 = titanic.sex == "male"
mask1

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

In [28]:
titanic_male = titanic.loc[mask1]

In [29]:
titanic_male.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,


In [31]:
titanic.dtypes

survived      int64
pclass        int64
sex          object
age         float64
sibsp         int64
parch         int64
fare        float64
embarked     object
deck         object
dtype: object

In [32]:
mask2 = titanic.dtypes == object
mask2

survived    False
pclass      False
sex          True
age         False
sibsp       False
parch       False
fare        False
embarked     True
deck         True
dtype: bool

## aşağıda direkt mask2 kullanınca sadece true olanlar gözükmekte

In [38]:
titanic.loc[:, mask2]

Unnamed: 0,sex,embarked,deck
0,male,S,
1,female,C,C
2,female,S,
3,female,S,C
4,male,S,
...,...,...,...
886,male,S,
887,female,S,B
888,female,S,
889,male,C,C


# ~ işareti ile tam tersi yapılmakta

In [40]:
titanic.loc[:, ~mask2]

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.2500
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.9250
3,1,1,35.0,1,0,53.1000
4,0,3,35.0,0,0,8.0500
...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000
887,1,1,19.0,0,0,30.0000
888,0,3,,1,2,23.4500
889,1,1,26.0,0,0,30.0000
