# Pandas. Selecting rows and columns

Pandas documentation: https://pandas.pydata.org/docs/index.html

Pandas API reference: https://pandas.pydata.org/docs/reference/index.html#api

In [1]:
#import pandas library
import pandas as pd

### Population in galician municipalities

**Source**: Instituto Galego de Estatística 

https://www.ige.gal/



In [2]:
# Open CSV fil from local filesystem
df_concellos = pd.read_csv(('../datasets/concellos_con_poboacion.csv'))

In [3]:
df_concellos

Unnamed: 0,Concello,CODIGOINE,poboacion
0,Sarreaus,32078,1114.0
1,Taboadela,32079,1452.0
2,"Bola, A",32014,1075.0
3,Maside,32045,2718.0
4,Trasmiras,32082,1261.0
...,...,...,...
308,Salvaterra de Miño,36050,10048.0
309,Sanxenxo,36051,17635.0
310,Silleda,36052,8845.0
311,Soutomaior,36053,7482.0


In [4]:
# Select columns: concello and poboacion
df_concellos[['Concello','poboacion']]

Unnamed: 0,Concello,poboacion
0,Sarreaus,1114.0
1,Taboadela,1452.0
2,"Bola, A",1075.0
3,Maside,2718.0
4,Trasmiras,1261.0
...,...,...
308,Salvaterra de Miño,10048.0
309,Sanxenxo,17635.0
310,Silleda,8845.0
311,Soutomaior,7482.0


In [5]:
# Select columns: concello and CODIGOINE
df_concellos[['Concello','CODIGOINE']]

Unnamed: 0,Concello,CODIGOINE
0,Sarreaus,32078
1,Taboadela,32079
2,"Bola, A",32014
3,Maside,32045
4,Trasmiras,32082
...,...,...
308,Salvaterra de Miño,36050
309,Sanxenxo,36051
310,Silleda,36052
311,Soutomaior,36053


In [6]:
# Select 3 first rows
df_concellos.head(3)

Unnamed: 0,Concello,CODIGOINE,poboacion
0,Sarreaus,32078,1114.0
1,Taboadela,32079,1452.0
2,"Bola, A",32014,1075.0


In [7]:
# Select the 2nd row
# Rows starts from 0
df_concellos.iloc[[1]]

Unnamed: 0,Concello,CODIGOINE,poboacion
1,Taboadela,32079,1452.0


In [8]:
# Select the 2nd row
# .loc uses the index to select rows
df_concellos.loc[[1]]

Unnamed: 0,Concello,CODIGOINE,poboacion
1,Taboadela,32079,1452.0


In [9]:
# Load the dataset again, using 'Concellos' as index
df_concellos2 = pd.read_csv(('../datasets/concellos_con_poboacion.csv'),index_col='Concello')

In [10]:
df_concellos2

Unnamed: 0_level_0,CODIGOINE,poboacion
Concello,Unnamed: 1_level_1,Unnamed: 2_level_1
Sarreaus,32078,1114.0
Taboadela,32079,1452.0
"Bola, A",32014,1075.0
Maside,32045,2718.0
Trasmiras,32082,1261.0
...,...,...
Salvaterra de Miño,36050,10048.0
Sanxenxo,36051,17635.0
Silleda,36052,8845.0
Soutomaior,36053,7482.0


In [11]:
# Select the 2nd row
# Rows starts from 0
df_concellos2.iloc[[1]]

Unnamed: 0_level_0,CODIGOINE,poboacion
Concello,Unnamed: 1_level_1,Unnamed: 2_level_1
Taboadela,32079,1452.0


In [12]:
# Select the 2nd row
# .loc uses the index to select rows
df_concellos2.loc[[1]]     #fails, because '1' doesn't belong to the index range

KeyError: "None of [Index([1], dtype='int64', name='Concello')] are in the [index]"

In [None]:
# Select the 2nd row  <<< 'Taboadela'
df_concellos2.loc[['Taboadela']]

In [None]:
# loc vs iloc

# when to use loc: 
# when you know the indexes and you need to find something concrete
# to select ranges omiting positions

# when to use iloc:
# when you want to select rows or ranges attending at the position in the dataset

In [None]:
df_concellos2

In [None]:
# Select many independent rows using iloc
df_concellos2.iloc[[1,3,5]]

In [None]:
# Select many independent rows using indexes with loc
df_concellos2.loc[['Taboadela','Maside','Carballeda de Avia']]

In [None]:
# Select a range of rows
df_concellos2.iloc[0:5]

In [None]:
# Select cell [row,column] 
df_concellos2.iloc[0,1]