# Selecting Rows and Columns - Recap

In [1]:
import pandas as pd
import seaborn as sns

In [2]:
df = sns.load_dataset('penguins')
df.head(3)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female


In [5]:
#select one column
df["island"].head(5)

0    Torgersen
1    Torgersen
2    Torgersen
3    Torgersen
4    Torgersen
Name: island, dtype: object

In [24]:
df[["bill_length_mm", "island"]].head(3)

Unnamed: 0,bill_length_mm,island
0,39.1,Torgersen
1,39.5,Torgersen
2,40.3,Torgersen


In [13]:
#selecting a single of rows
df[0:3] # slice way


species                 Adelie
island               Torgersen
bill_length_mm            39.1
bill_depth_mm             18.7
flipper_length_mm        181.0
body_mass_g             3750.0
sex                       Male
Name: 0, dtype: object

In [19]:
# Alternative using a numerical index pos in df
df.iloc[3]

species                 Adelie
island               Torgersen
bill_length_mm             NaN
bill_depth_mm              NaN
flipper_length_mm          NaN
body_mass_g                NaN
sex                        NaN
Name: 3, dtype: object

In [23]:
# Alternative using a String index pos in df
dfis = df.set_index("island")
dfis.head()
dfis.loc["Torgersen"].head(3)

Unnamed: 0_level_0,species,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
island,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Torgersen,Adelie,39.1,18.7,181.0,3750.0,Male
Torgersen,Adelie,39.5,17.4,186.0,3800.0,Female
Torgersen,Adelie,40.3,18.0,195.0,3250.0,Female


In [28]:
# Multiple Rows
df.iloc[10:15:2] # evetry second penguin

df.loc[10:15].head(3)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
10,Adelie,Torgersen,37.8,17.1,186.0,3300.0,
11,Adelie,Torgersen,37.8,17.3,180.0,3700.0,
12,Adelie,Torgersen,41.1,17.6,182.0,3200.0,Female


In [34]:
# selcting with a binary mask
mask = ([True, False, True,True,False] * 100)[:df.shape[0]]
df.loc[mask].head(10)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,Male
7,Adelie,Torgersen,39.2,19.6,195.0,4675.0,Male
8,Adelie,Torgersen,34.1,18.1,193.0,3475.0,
10,Adelie,Torgersen,37.8,17.1,186.0,3300.0,
12,Adelie,Torgersen,41.1,17.6,182.0,3200.0,Female
13,Adelie,Torgersen,38.6,21.2,191.0,3800.0,Male
15,Adelie,Torgersen,36.6,17.8,185.0,3700.0,Female


In [37]:
#selct by row and column
# what species does penguin 99 have?
df.iloc[99]["species"]

'Adelie'

In [38]:
df.iloc[96:99,0:3]

Unnamed: 0,species,island,bill_length_mm
96,Adelie,Dream,38.1
97,Adelie,Dream,40.3
98,Adelie,Dream,33.1


In [40]:
#selection algebra
df.loc[99, "species"]

'Adelie'

In [44]:
df[(df["bill_length_mm"] > 40) & (df["sex"] == "Female")] # or: |

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
12,Adelie,Torgersen,41.1,17.6,182.0,3200.0,Female
27,Adelie,Biscoe,40.5,17.9,187.0,3200.0,Female
37,Adelie,Dream,42.2,18.5,180.0,3550.0,Female
76,Adelie,Torgersen,40.9,16.8,191.0,3700.0,Female
...,...,...,...,...,...,...,...
332,Gentoo,Biscoe,43.5,15.2,213.0,4650.0,Female
334,Gentoo,Biscoe,46.2,14.1,217.0,4375.0,Female
338,Gentoo,Biscoe,47.2,13.7,214.0,4925.0,Female
340,Gentoo,Biscoe,46.8,14.3,215.0,4850.0,Female
