# Exercise: using selectors together with `ApplyToCols`
Consider this example dataframe:

In [1]:
import pandas as pd

df = pd.DataFrame(
    {
        "metric_1": [10.5, 20.3, 30.1, 40.2],
        "metric_2": [5.1, 15.6, None, 35.8],
        "metric_3": [1.1, 3.3, 2.6, .8],
        "num_id": [101, 102, 103, 104],
        "str_id": ["A101", "A102", "A103", "A104"],
        "description": ["apple", None, "cherry", "date"],
        "name": ["Alice", "Bob", "Charlie", "David"],
    }
)
df

Unnamed: 0,metric_1,metric_2,metric_3,num_id,str_id,description,name
0,10.5,5.1,1.1,101,A101,apple,Alice
1,20.3,15.6,3.3,102,A102,,Bob
2,30.1,,2.6,103,A103,cherry,Charlie
3,40.2,35.8,0.8,104,A104,date,David


Using the skrub selectors and `ApplyToCols`:

- Apply the `StandardScaler` to numeric columns, except `"num_id"`. 
- Apply a `OneHotEncoder` with `sparse_output=False` on all string columns except
`"str_id"`. 

In [2]:
import skrub.selectors as s
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from skrub import ApplyToCols
from sklearn.pipeline import make_pipeline

# Write your solution here
# 
# 
# 
# 
# 
# 
# 
# 
# 

In [3]:
import skrub.selectors as s
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from skrub import ApplyToCols
from sklearn.pipeline import make_pipeline

scaler = ApplyToCols(StandardScaler(), cols=s.numeric() - "num_id")
one_hot = ApplyToCols(OneHotEncoder(sparse_output=False), cols=s.string() - "str_id")

transformer = make_pipeline(scaler, one_hot)

transformer.fit_transform(df)

Unnamed: 0,metric_1,metric_2,metric_3,num_id,str_id,description_apple,description_cherry,description_date,description_None,name_Alice,name_Bob,name_Charlie,name_David
0,-1.336178,-1.077965,-0.820768,101,A101,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,-0.449914,-0.253793,1.303572,102,A102,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2,0.436349,,0.627646,103,A103,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.349743,1.331758,-1.11045,104,A104,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Given the same dataframe and using selectors, drop only string columns that contain
nulls. 

In [4]:
from skrub import DropCols

# Write your solution here
# 
# 
# 
# 
# 
# 
# 

In [5]:
from skrub import DropCols

DropCols(cols=s.has_nulls() & s.string()).fit_transform(df)

Unnamed: 0,metric_1,metric_2,metric_3,num_id,str_id,name
0,10.5,5.1,1.1,101,A101,Alice
1,20.3,15.6,3.3,102,A102,Bob
2,30.1,,2.6,103,A103,Charlie
3,40.2,35.8,0.8,104,A104,David


Now write a custom function that selects columns where all values are lower than
`10.0`. 

In [6]:
from skrub import SelectCols

# Write your solution here
# 
# 
# 
# 
# 
# 
# 

In [7]:
from skrub import SelectCols

def lower_than(col):
    return all(col < 10.0)

SelectCols(cols=s.numeric() & s.filter(lower_than)).fit_transform(df)

Unnamed: 0,metric_3
0,1.1
1,3.3
2,2.6
3,0.8
