In [1]:
# !pip install polars

In [2]:
import polars as pl

pl.__version__

'0.16.8'

## Read in Data

In [3]:
data = pl.read_csv('https://raw.githubusercontent.com/noahrubin989/datasets/main/epidemiology/health.csv')

In [4]:
data.head(4)

GDP/Capita,MeanSchooling,InfantMortality,Life_exp,Country,Year,Country Type
f64,f64,f64,f64,str,i64,str
61598.536704,12.6,3.3,80.953659,"""Denmark""",2018,"""Developed"""
4134.987198,10.2,14.2,69.689,"""Mongolia""",2018,"""Developing"""
82818.108162,13.4,3.6,83.753659,"""Switzerland""",2018,"""Developed"""
11633.498009,10.6,8.7,76.52,"""Argentina""",2018,"""Developing"""


## Select a Single Column

In [5]:
# Square bracket syntax
data['InfantMortality']

InfantMortality
f64
3.3
14.2
3.6
8.7
17.2
11.0
75.7
70.8
23.7
5.4


In [6]:
# Select method
data.select('InfantMortality')

InfantMortality
f64
3.3
14.2
3.6
8.7
17.2
11.0
75.7
70.8
23.7
5.4


## Select Multiple Columns

In [7]:
# Square bracket syntax
data[['InfantMortality', 'Life_exp']]

InfantMortality,Life_exp
f64,f64
3.3,80.953659
14.2,69.689
3.6,83.753659
8.7,76.52
17.2,74.131
11.0,74.945
75.7,54.332
70.8,53.977
23.7,69.57
5.4,72.657317


In [8]:
# Select method
data.select(['InfantMortality', 'Life_exp'])

InfantMortality,Life_exp
f64,f64
3.3,80.953659
14.2,69.689
3.6,83.753659
8.7,76.52
17.2,74.131
11.0,74.945
75.7,54.332
70.8,53.977
23.7,69.57
5.4,72.657317


# Selecting by a Particular Data Type

In [9]:
# Single data type
data.select(pl.col(pl.Utf8))

Country,Country Type
str,str
"""Denmark""","""Developed"""
"""Mongolia""","""Developing"""
"""Switzerland""","""Developed"""
"""Argentina""","""Developing"""
"""Paraguay""","""Developing"""
"""Armenia""","""Developing"""
"""Nigeria""","""Developing"""
"""Chad""","""Developing"""
"""Cambodia""","""Developing"""
"""Russia""","""Developed"""


In [10]:
# More than one datatype
data.select([pl.col(pl.Float64), pl.col(pl.Int64)])

GDP/Capita,MeanSchooling,InfantMortality,Life_exp,Year
f64,f64,f64,f64,i64
61598.536704,12.6,3.3,80.953659,2018
4134.987198,10.2,14.2,69.689,2018
82818.108162,13.4,3.6,83.753659,2018
11633.498009,10.6,8.7,76.52,2018
5805.675616,8.4,17.2,74.131,2018
4220.490277,11.3,11.0,74.945,2018
2027.777979,6.5,75.7,54.332,2018
726.149881,2.5,70.8,53.977,2018
1512.12671,4.8,23.7,69.57,2018
11370.813456,12.2,5.4,72.657317,2018


## Select Based on Column Name Condition

In [11]:
# Example 1
data.select(pl.col('^[Cc].*$'))

Country,Country Type
str,str
"""Denmark""","""Developed"""
"""Mongolia""","""Developing"""
"""Switzerland""","""Developed"""
"""Argentina""","""Developing"""
"""Paraguay""","""Developing"""
"""Armenia""","""Developing"""
"""Nigeria""","""Developing"""
"""Chad""","""Developing"""
"""Cambodia""","""Developing"""
"""Russia""","""Developed"""


In [12]:
# Example 2
data.select(pl.col('^.*y$'))

InfantMortality,Country
f64,str
3.3,"""Denmark"""
14.2,"""Mongolia"""
3.6,"""Switzerland"""
8.7,"""Argentina"""
17.2,"""Paraguay"""
11.0,"""Armenia"""
75.7,"""Nigeria"""
70.8,"""Chad"""
23.7,"""Cambodia"""
5.4,"""Russia"""


## Rename Columns

In [13]:
# Rename method
df_renamed = data.rename({'InfantMortality': 'infmort', 'Country':'ctry'})
df_renamed

GDP/Capita,MeanSchooling,infmort,Life_exp,ctry,Year,Country Type
f64,f64,f64,f64,str,i64,str
61598.536704,12.6,3.3,80.953659,"""Denmark""",2018,"""Developed"""
4134.987198,10.2,14.2,69.689,"""Mongolia""",2018,"""Developing"""
82818.108162,13.4,3.6,83.753659,"""Switzerland""",2018,"""Developed"""
11633.498009,10.6,8.7,76.52,"""Argentina""",2018,"""Developing"""
5805.675616,8.4,17.2,74.131,"""Paraguay""",2018,"""Developing"""
4220.490277,11.3,11.0,74.945,"""Armenia""",2018,"""Developing"""
2027.777979,6.5,75.7,54.332,"""Nigeria""",2018,"""Developing"""
726.149881,2.5,70.8,53.977,"""Chad""",2018,"""Developing"""
1512.12671,4.8,23.7,69.57,"""Cambodia""",2018,"""Developing"""
11370.813456,12.2,5.4,72.657317,"""Russia""",2018,"""Developed"""


In [14]:
# Adjust all columns
data.columns = ['gdp_cap', 'mschooling', 'infmort', 'Life_exp', 'ctry', 'yr', 'ctry type']
data

gdp_cap,mschooling,infmort,Life_exp,ctry,yr,ctry type
f64,f64,f64,f64,str,i64,str
61598.536704,12.6,3.3,80.953659,"""Denmark""",2018,"""Developed"""
4134.987198,10.2,14.2,69.689,"""Mongolia""",2018,"""Developing"""
82818.108162,13.4,3.6,83.753659,"""Switzerland""",2018,"""Developed"""
11633.498009,10.6,8.7,76.52,"""Argentina""",2018,"""Developing"""
5805.675616,8.4,17.2,74.131,"""Paraguay""",2018,"""Developing"""
4220.490277,11.3,11.0,74.945,"""Armenia""",2018,"""Developing"""
2027.777979,6.5,75.7,54.332,"""Nigeria""",2018,"""Developing"""
726.149881,2.5,70.8,53.977,"""Chad""",2018,"""Developing"""
1512.12671,4.8,23.7,69.57,"""Cambodia""",2018,"""Developing"""
11370.813456,12.2,5.4,72.657317,"""Russia""",2018,"""Developed"""
