## Selecting specific data with loc, iloc, at, iat

In [6]:
import numpy as np
import pandas as pd

CSV_SAMPLE_FILE = "datasets/ODI_WC_2023_batting_summary.csv"
df = pd.read_csv(CSV_SAMPLE_FILE)

df.rename(lambda x: x.lower(), axis='columns', inplace=True)
df.head()

Unnamed: 0,match_no,match_between,team_innings,batsman_name,batting_position,dismissal,runs,balls,4s,6s,strike_rate
0,1,England vs New Zealand,England,Jonny Bairstow,1,c Daryl Mitchell b Mitchell Santner,33,35,4,1,94.3
1,1,England vs New Zealand,England,Dawid Malan,2,c Tom Latham b Matt Henry,14,24,2,0,58.3
2,1,England vs New Zealand,England,Joe Root,3,b Glenn Phillips,77,86,4,1,89.5
3,1,England vs New Zealand,England,Harry Brook,4,c Devon Conway b Rachin Ravindra,25,16,4,1,156.3
4,1,England vs New Zealand,England,Moeen Ali,5,b Glenn Phillips,11,17,1,0,64.7


#### Pandas offers 4 primary methods to select items:
1. **Dot notations**: select a single column
2. **loc**: select based on column names and index names
3. **iloc**: select based on the column number and row number
4. **iat**: select one item only based on column and row number

In [9]:
# 1. Dot Notations: Select one column only as reference.
df.batsman_name.head()

0    Jonny Bairstow
1       Dawid Malan
2          Joe Root
3       Harry Brook
4         Moeen Ali
Name: batsman_name, dtype: object

In [12]:
# 2. using .loc example
# .loc takes 2 arguments inside the square brackets. one for index names (row names) an another for column names.

# without .loc 
df['batsman_name'].head()

0    Jonny Bairstow
1       Dawid Malan
2          Joe Root
3       Harry Brook
4         Moeen Ali
Name: batsman_name, dtype: object

In [13]:
# with .loc
df.loc[:, 'batsman_name']

0          Jonny Bairstow
1             Dawid Malan
2                Joe Root
3             Harry Brook
4               Moeen Ali
              ...        
911           Travis Head
912        Mitchell Marsh
913           Steve Smith
914    Marnus Labuschagne
915         Glenn Maxwell
Name: batsman_name, Length: 916, dtype: object

In [17]:
# this is wrong
# df.loc[:, 'batsman_name', 'dismissal'].head()

df.loc[:, ['batsman_name', 'dismissal']].head()

Unnamed: 0,batsman_name,dismissal
0,Jonny Bairstow,c Daryl Mitchell b Mitchell Santner
1,Dawid Malan,c Tom Latham b Matt Henry
2,Joe Root,b Glenn Phillips
3,Harry Brook,c Devon Conway b Rachin Ravindra
4,Moeen Ali,b Glenn Phillips


In [19]:
df.loc[1:4, ['batsman_name', 'dismissal']].head() # pass row index

Unnamed: 0,batsman_name,dismissal
1,Dawid Malan,c Tom Latham b Matt Henry
2,Joe Root,b Glenn Phillips
3,Harry Brook,c Devon Conway b Rachin Ravindra
4,Moeen Ali,b Glenn Phillips


In [23]:
# using .iloc example
# df.iloc[row_indexes, column_indexes]

df.iloc[:, :].head(2)

Unnamed: 0,match_no,match_between,team_innings,batsman_name,batting_position,dismissal,runs,balls,4s,6s,strike_rate
0,1,England vs New Zealand,England,Jonny Bairstow,1,c Daryl Mitchell b Mitchell Santner,33,35,4,1,94.3
1,1,England vs New Zealand,England,Dawid Malan,2,c Tom Latham b Matt Henry,14,24,2,0,58.3


In [26]:
df.iloc[:, :2].head(2)

Unnamed: 0,match_no,match_between
0,1,England vs New Zealand
1,1,England vs New Zealand


In [29]:
df.iloc[2:3, :4].head(2)

Unnamed: 0,match_no,match_between,team_innings,batsman_name
2,1,England vs New Zealand,England,Joe Root
