# Selecting Rows and Columns

---

### 1. Read Data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/large_countries_2015.csv', index_col=0)

### 2. Inspect Data

In [3]:
df.shape

(12, 3)

In [4]:
df.head(3)

Unnamed: 0,population,fertility,continent
Bangladesh,160995600.0,2.12,Asia
Brazil,207847500.0,1.78,South America
China,1376049000.0,1.57,Asia


In [5]:
df.tail(3)

Unnamed: 0,population,fertility,continent
Philippines,100699395.0,2.98,Asia
Russia,143456918.0,1.61,Europe
United States,321773631.0,1.97,North America


### 3. Select Columns

In [6]:
df["population"]

Bangladesh       1.609956e+08
Brazil           2.078475e+08
China            1.376049e+09
India            1.311051e+09
Indonesia        2.575638e+08
Japan            1.265735e+08
Mexico           1.270172e+08
Nigeria          1.822020e+08
Pakistan         1.889249e+08
Philippines      1.006994e+08
Russia           1.434569e+08
United States    3.217736e+08
Name: population, dtype: float64

In [None]:
col_names = ["population", 'fertility']
df[col_names].head(3)

### 4. Select Rows

In [None]:
df.loc['Brazil']

In [None]:
df.loc[['Japan', 'China', 'Brazil']]

In [None]:
cont = df.set_index('continent')
cont.loc['Asia'].head(3)

In [None]:
df.iloc[[1, 3, 5]]  # by positions of the rows

In [None]:
df.iloc[1:5]        # slice

In [None]:
df.iloc[::2]  # every second row from original data

### 5. Select Both Rows and Columns

In [None]:
df.loc[['Japan', 'China', 'Brazil'], ['continent', 'fertility']]

In [None]:
df.iloc[1:5, 0:2]

### 6. Select by Conditions

In [None]:
df['in_asia'] = df['continent'] == 'Asia'

In [None]:
df.head(3)

In [None]:
high_pop = df[df['population'] > 250_000_000]
high_pop.shape

In [None]:
mid_pop = df[df['population'].between(100_000_000, 250_000_000)]
mid_pop.shape

In [None]:
low_fert_asia = df[(df['fertility'] < 1.8) & (df['continent'] == 'Asia') ]
low_fert_asia.head(5)