In [None]:
'''
                         ,/(/*.                 
                   (((((((((((((((((            
                  /((   (((((((((((##           
                  /(((((((((((((#####           
             ,/////////(/(((((####### .....     
          ((((((((((((((((((######### ........  
         (((((((((((((((((########### ........  
        .(((((((((((((((###########* .......... 
        /((((((((((*             .............. 
         ((((((((( ............................ 
         (((((((( ............................  
           (((((#  ........................,.   
                  ...................           
                  ..............  ...           
                   ............   .,,           
                     ..,..........     
             
                Python Tip Friday: 11/01/2024
                    polars selectors API

    polars has an extremely convenient API to select columns of different types.
    Normally you can select columns in Polars by specifying the name, pattern, type, or
    by using pl.all(). The selectors API expands on this by giving you a set of convenient
    functions that select columns for you. For example, you can select all columns
    that start with "date," or you can select all numeric columns which include
    int16, int32, float32, and float64. Let's run through a few examples to see how
    convenient this is.
    ----------------------------------------------------
    Stu Sztukowski | https://linkedin.com/in/StatsGuy
                   | https://github.com/stu-code
'''

In [15]:
import polars as pl
import polars.selectors as cs

df=pl.DataFrame(
    schema={
        "int16":    pl.Int16,
        "int32":    pl.Int32,
        "float32":  pl.Float32,
        "float64":  pl.Float64,
        "bool":     pl.Boolean,
        "string":   pl.String,
        "cat":      pl.Categorical,
        "date":     pl.Date,
        "datetime": pl.Datetime,
        "time":     pl.Time,
        "dur":      pl.Duration,
    },
)

One way of selecting columns in Polars is to specify the specific column types that you want to select. For example, if we wanted to select all numeric columns, we would have to specify each one: 
- Int16
- Int32
- Float32
- Float64

In [8]:
df.select(
    [pl.col(pl.Int16), pl.col(pl.Int32), pl.col(pl.Float32), pl.col(pl.Float64)]
)

int16,int32,float32,float64
i16,i32,f32,f64


But this is tedious if we want to do something like select all numbers. We can instead use the selectors API to very easily do this.

In [11]:
df.select(
    cs.numeric()
)

int16,int32,float32,float64
i16,i32,f32,f64


We can even combine values with OR operations.

In [17]:
df.select(
    cs.string() | cs.categorical()
)

string,cat
str,cat


Here are some other ways you can use the selectors API to quickly select columns.

In [18]:
# Columns that start with 'd'
df.select(
    cs.starts_with('d')
)

date,datetime,dur
date,datetime[μs],duration[μs]


In [27]:
# Columns whose name contains the string 'ate'
df.select(
    cs.contains('ate')
)

date,datetime
date,datetime[μs]


In [19]:
# Everything except Duration columns
df.select(
    cs.exclude(pl.Duration)
)

int16,int32,float32,float64,bool,string,cat,date,datetime,time
i16,i32,f32,f64,bool,str,cat,date,datetime[μs],time


In [21]:
# First or last column
df.select(
    cs.first() | cs.last()
)

int16,dur
i16,duration[μs]


In [29]:
#  All temporal calumns that have the word 'date'
df.select(
    cs.temporal() & cs.matches('date')
)

date,datetime
date,datetime[μs]


In [31]:
# All alphanumerics except temporal
df.select(
    cs.alpha() - cs.temporal()
)

bool,string,cat
bool,str,cat
