# Selecting Rows and Columns

In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('../data/large_countries_2015.csv', index_col=0)

In [4]:
df.shape

(12, 3)

### Display Rows

In [5]:
df.head(3)

Unnamed: 0,population,fertility,continent
Bangladesh,160995600.0,2.12,Asia
Brazil,207847500.0,1.78,South America
China,1376049000.0,1.57,Asia


In [6]:
df.tail(3)

Unnamed: 0,population,fertility,continent
Philippines,100699395.0,2.98,Asia
Russia,143456918.0,1.61,Europe
United States,321773631.0,1.97,North America


### Select columns

In [7]:
df["population"]

Bangladesh       1.609956e+08
Brazil           2.078475e+08
China            1.376049e+09
India            1.311051e+09
Indonesia        2.575638e+08
Japan            1.265735e+08
Mexico           1.270172e+08
Nigeria          1.822020e+08
Pakistan         1.889249e+08
Philippines      1.006994e+08
Russia           1.434569e+08
United States    3.217736e+08
Name: population, dtype: float64

In [8]:
col_names = ["population", 'fertility']
df[col_names].head(3)

Unnamed: 0,population,fertility
Bangladesh,160995600.0,2.12
Brazil,207847500.0,1.78
China,1376049000.0,1.57


### Select Rows

In [9]:
df.loc['Brazil']

population      2.07848e+08
fertility              1.78
continent     South America
Name: Brazil, dtype: object

In [10]:
df.loc[['Japan', 'China', 'Brazil']]

Unnamed: 0,population,fertility,continent
Japan,126573500.0,1.45,Asia
China,1376049000.0,1.57,Asia
Brazil,207847500.0,1.78,South America


In [11]:
cont = df.set_index('continent')
cont.loc['Asia'].head(3)

Unnamed: 0_level_0,population,fertility
continent,Unnamed: 1_level_1,Unnamed: 2_level_1
Asia,160995600.0,2.12
Asia,1376049000.0,1.57
Asia,1311051000.0,2.43


In [12]:
df.iloc[[1, 3, 5]]  # by positions of the rows

Unnamed: 0,population,fertility,continent
Brazil,207847500.0,1.78,South America
India,1311051000.0,2.43,Asia
Japan,126573500.0,1.45,Asia


In [13]:
df.iloc[1:5]        # slice

Unnamed: 0,population,fertility,continent
Brazil,207847500.0,1.78,South America
China,1376049000.0,1.57,Asia
India,1311051000.0,2.43,Asia
Indonesia,257563800.0,2.28,Asia


In [14]:
df.iloc[::2]  # every second row from original data

Unnamed: 0,population,fertility,continent
Bangladesh,160995600.0,2.12,Asia
China,1376049000.0,1.57,Asia
Indonesia,257563800.0,2.28,Asia
Mexico,127017200.0,2.13,North America
Pakistan,188924900.0,3.04,Asia
Russia,143456900.0,1.61,Europe


### Select both Rows and Columns

In [15]:
df.loc[['Japan', 'China', 'Brazil'], ['continent', 'fertility']]

Unnamed: 0,continent,fertility
Japan,Asia,1.45
China,Asia,1.57
Brazil,South America,1.78


In [16]:
df.iloc[1:5, 0:2]

Unnamed: 0,population,fertility
Brazil,207847500.0,1.78
China,1376049000.0,1.57
India,1311051000.0,2.43
Indonesia,257563800.0,2.28


### Select by Conditions

In [17]:
df['in_asia'] = df['continent'] == 'Asia'

In [18]:
df.head(3)

Unnamed: 0,population,fertility,continent,in_asia
Bangladesh,160995600.0,2.12,Asia,True
Brazil,207847500.0,1.78,South America,False
China,1376049000.0,1.57,Asia,True


In [22]:
df[df['continent'] == 'Asia'].head(3)

Unnamed: 0,population,fertility,continent,in_asia
Bangladesh,160995600.0,2.12,Asia,True
China,1376049000.0,1.57,Asia,True
India,1311051000.0,2.43,Asia,True


In [23]:
high_pop = df[df['population'] > 250_000_000]
high_pop.shape

(4, 4)

In [24]:
mid_pop = df[df['population'].between(100_000_000, 250_000_000)]
mid_pop.shape

(8, 4)

In [25]:
low_fert_asia = df[(df['fertility'] < 1.8) & (df['continent'] == 'Asia') ]
low_fert_asia.head(5)

Unnamed: 0,population,fertility,continent,in_asia
China,1376049000.0,1.57,Asia,True
Japan,126573500.0,1.45,Asia,True
