In [2]:
import pandas as pd

In [3]:
# reading csv file
df = pd.read_csv('players_20.csv')

# setting index
df.set_index('short_name', inplace=True)

# selecting columns
df = df[['long_name','age','dob','height_cm','weight_kg','nationality','club']]

In [5]:
# showing the dataframe
df.head()

Unnamed: 0_level_0,long_name,age,dob,height_cm,weight_kg,nationality,club
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona
Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus
Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain
J. Oblak,Jan Oblak,26,1993-01-07,188,87,Slovenia,Atlético Madrid
E. Hazard,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid


# 1. Selecting with a single value

We're going to select elements by index label with the .loc() method.

loc[row_label, column_label]

In [6]:
# getting all data about L. Messi
df.loc['L. Messi']

long_name      Lionel Andrés Messi Cuccittini
age                                        32
dob                                1987-06-24
height_cm                                 170
weight_kg                                  72
nationality                         Argentina
club                             FC Barcelona
Name: L. Messi, dtype: object

In [9]:
# And we got all the data about this player. We have the full name, the age, date of birth
# and all this data. So now let's get a particular value of this data that we got

In [11]:
# getting the height of L. Messi
df.loc['L. Messi', 'height_cm']

np.int64(170)

In [None]:
# As I can see we got the value of 170, and that's the height of this player in cms

In [12]:
# getting the weight of Cristiano Ronaldo
df.loc['Cristiano Ronaldo', 'weight_kg']

np.int64(83)

In [13]:
# And we can see that the weight of this player is 83 kg

In [14]:
# getting all rows inside the 'height_cm' column
df.loc[:,'height_cm']

# df.loc[: <all the rows, all the elements>, <feature_that_we_want>]

short_name
L. Messi             170
Cristiano Ronaldo    187
Neymar Jr            175
J. Oblak             188
E. Hazard            175
                    ... 
Shao Shuai           186
Xiao Mingjie         177
Zhang Wei            186
Wang Haijian         185
Pan Ximing           182
Name: height_cm, Length: 18278, dtype: int64

In [15]:
# And now as we can see, we got this series, and this series
# is about the height of the players. So here we have the information
# about the height of all the players listed here. For example, Lionel Messi is 170 cm tall,
# then Neymar is 175cm, and so on

In [16]:
# getting all columns that correspond to the index 'L. Messi'
df.loc['L. Messi', :]

# df.loc[<name of the player, or observation we're interested in>, : <all the columns>]

long_name      Lionel Andrés Messi Cuccittini
age                                        32
dob                                1987-06-24
height_cm                                 170
weight_kg                                  72
nationality                         Argentina
club                             FC Barcelona
Name: L. Messi, dtype: object

In [None]:
# And this one looks similar to the result we obtained before

# 2. Selecting with a list of values

In [17]:
# getting all data about Lionel Messi and Cristiano ronaldo
df.loc[['L. Messi','Cristiano Ronaldo'],:]

Unnamed: 0_level_0,long_name,age,dob,height_cm,weight_kg,nationality,club
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona
Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus


In [18]:
# getting the height of Lionel Messi and Cristiano ronaldo
df.loc[['L. Messi','Cristiano Ronaldo'],'height_cm']

short_name
L. Messi             170
Cristiano Ronaldo    187
Name: height_cm, dtype: int64

In [19]:
# And now we only have the height of these two players

In [21]:
# getting the height and weight of L. Messi
df.loc['L. Messi',['height_cm','weight_kg']]

height_cm    170
weight_kg     72
Name: L. Messi, dtype: object

In [None]:
# And now we get the height and the weight of this player

In [23]:
# getting the height and weight of L. Messi and Cristiano Ronaldo
df.loc[['L. Messi','Cristiano Ronaldo'],['height_cm','weight_kg']]

Unnamed: 0_level_0,height_cm,weight_kg
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1
L. Messi,170,72
Cristiano Ronaldo,187,83


In [24]:
# And now we can see that we have this little dataframe
# with the height and weight of Messi and Ronaldo. 

# 3. Selecting a range of data with a slice

start:stop:step (we should note that, contrary to usual python slices, both the start and the stop are included)

In [25]:
# slicing column labels
players = ['L. Messi','Cristiano Ronaldo']

df.loc[players, 'age':'club']

Unnamed: 0_level_0,age,dob,height_cm,weight_kg,nationality,club
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L. Messi,32,1987-06-24,170,72,Argentina,FC Barcelona
Cristiano Ronaldo,34,1985-02-05,187,83,Portugal,Juventus


In [26]:
# And now as we can see we have the slice between 'age' and 'club'.
# So only the columns between these two columns were selected.

# So that's how you make a slice with column labels.

In [27]:
# slicing index labels 
columns = ['age', 'dob', 'height_cm', 'weight_kg']

# In this example, we want to slice index labels that are between the top1 and the top10 player. 

# So this dataset sorts the players based on their scores in the FIFA game :)
# So the first player (1st row) that is in this dataset is the best player acording to this FIFA game.
# And the 10th player in this df is the 10th best player in this game.

# So here we can get a slice that represents the top ten players 
# (las 10 primeras filas y de la columna 'age' hasta 'weight_kg')

# getting top1 and top10 player name
df.index[:10]

Index(['L. Messi', 'Cristiano Ronaldo', 'Neymar Jr', 'J. Oblak', 'E. Hazard',
       'K. De Bruyne', 'M. ter Stegen', 'V. van Dijk', 'L. Modrić',
       'M. Salah'],
      dtype='object', name='short_name')

In [28]:
# We have that the best player in this game is 'L. Messi' and the 10th best player is
# Muhammad Salah. So now we can make a slice by writing both Lionel Messi and Muhammad Salah

In [29]:
df.loc['L. Messi':'M. Salah', columns]

Unnamed: 0_level_0,age,dob,height_cm,weight_kg
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
L. Messi,32,1987-06-24,170,72
Cristiano Ronaldo,34,1985-02-05,187,83
Neymar Jr,27,1992-02-05,175,68
J. Oblak,26,1993-01-07,188,87
E. Hazard,28,1991-01-07,175,74
K. De Bruyne,28,1991-06-28,181,70
M. ter Stegen,27,1992-04-30,187,85
V. van Dijk,27,1991-07-08,193,92
L. Modrić,33,1985-09-09,172,66
M. Salah,27,1992-06-15,175,71


In [30]:
# So now we can see that we got the first 10 players in this dataframe with the features we selected.
# So from Lionel Messi (which is index 0) to Muhammad Salah (which is index 9)

# And that's how a range of data can be selected with a slice in the columns or in the index. 

# 4. Selecting with conditions

In [31]:
# one condition: selecting players with height above 180cm
columns = ['age', 'dob', 'height_cm', 'weight_kg']

df.loc[df['height_cm'] > 180 , columns]

# df.loc[ <index_filter>, selected_columns ]
# So now we're selecting elements that satisfy this condition in the index
# and that belongs to these columns inside this columns variable.

Unnamed: 0_level_0,age,dob,height_cm,weight_kg
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cristiano Ronaldo,34,1985-02-05,187,83
J. Oblak,26,1993-01-07,188,87
K. De Bruyne,28,1991-06-28,181,70
M. ter Stegen,27,1992-04-30,187,85
V. van Dijk,27,1991-07-08,193,92
...,...,...,...,...
P. Martin,20,1999-01-05,188,84
Shao Shuai,22,1997-03-10,186,79
Zhang Wei,19,2000-05-16,186,75
Wang Haijian,18,2000-08-02,185,74


In [None]:
# And we can see here in this df only players above 180 cm. So for example,
# L. Messi is not here because he is 170 cm. But C. Ronaldo is here because he's taller than 180 cm

In [36]:
# multiple conditions: selecting players with height above 180 cm from Argentina
filter = ( df['height_cm'] > 180 ) & ( df['nationality'] == 'Argentina')    # it's going to select elements
                                                                            # based on this condition
df.loc[filter, :]

Unnamed: 0_level_0,long_name,age,dob,height_cm,weight_kg,nationality,club
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
M. Icardi,Mauro Emanuel Icardi Rivero,26,1993-02-19,181,75,Argentina,Inter
G. Higuaín,Gonzalo Gerardo Higuaín,31,1987-12-10,186,89,Argentina,Juventus
E. Garay,Ezequiel Marcelo Garay,32,1986-10-10,189,90,Argentina,Valencia CF
N. Otamendi,Nicolás Hernán Otamendi,31,1988-02-12,183,81,Argentina,Manchester City
G. Rulli,Gerónimo Rulli,27,1992-05-20,189,84,Argentina,Montpellier HSC
...,...,...,...,...,...,...,...
T. Durso,Tomás Durso,20,1999-02-26,185,80,Argentina,Gimnasia y Esgrima La Plata
J. Hass,Joaquín Hass,21,1998-03-27,186,88,Argentina,Club Atlético Colón
R. Ferrario,Rafael Ferrario,19,2000-04-30,186,76,Argentina,Club Atlético Huracán
L. Finochietto,Leandro Finochietto,22,1997-04-25,186,77,Argentina,Argentinos Juniors


In [37]:
# So as we can see here, we don't have Cristiano Ronaldo anymore because he's from Portugal. 
# but we have other players from Argentina with height above 180 cm.

# And those are different ways to select elements with the .loc() method.