In [1]:
import polars as pl
import polars.selectors as cs
from polars.testing import assert_frame_equal

In [2]:
df_fintech = pl.read_csv("clean_fintech.csv")
df_fintech.head()

user_id,churn,age,credit_score,deposits,withdrawal,purchases_partners,purchases,cc_taken,cc_recommended,cc_disliked,cc_liked,cc_application_begin,app_downloaded,web_user,app_web_user,ios_user,android_user,registered_phones,payment_type,waiting_4_loan,cancelled_loan,received_loan,rejected_loan,zodiac_sign,rewards_earned,reward_rate,is_referred
i64,bool,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,bool,bool,bool,bool,bool,i64,str,bool,bool,bool,bool,str,f64,f64,bool
1,False,21.0,577.0,48,4,52,45,0,245,0,0,22,True,True,True,False,True,2,"""Semi-Monthly""",False,False,False,False,"""Pisces""",56.0,1.87,False
8,True,31.0,519.0,0,0,0,0,0,49,0,0,2,True,False,False,False,True,0,"""Bi-Weekly""",False,False,False,False,"""Virgo""",18.0,0.6,True
9,False,26.0,542.5156,0,0,30,0,0,56,0,0,5,True,True,True,False,True,2,"""Weekly""",False,False,False,False,"""Sagittarius""",23.0,0.77,False
10,False,33.0,558.0,0,0,76,0,0,166,0,0,8,True,False,False,False,True,0,"""Bi-Weekly""",False,False,False,False,"""Leo""",45.0,1.5,True
11,False,26.0,559.0,0,0,206,0,0,304,0,0,27,True,True,True,False,True,0,"""Bi-Weekly""",False,False,False,False,"""Virgo""",60.0,2.0,True


## Transformations to perform:
- Multiply all numeric columns * 2.
- Delete the letter "e" from all str columns.
- Set all bool variables to True.
- Create 3 extra numeric columns:
    - Mean of purchases.
    - Median of age.
    - Mean of credit_score.

In [3]:
df_fintech_2 = df_fintech[['age','credit_score','purchases','zodiac_sign','payment_type','churn','cancelled_loan','received_loan']].clone()
df_fintech_2 = df_fintech_2.with_columns(pl.col("age").cast(pl.Int64))

In [4]:
df_fintech_3 = df_fintech_2.clone()

In [5]:
df_fintech_2.head(2)

age,credit_score,purchases,zodiac_sign,payment_type,churn,cancelled_loan,received_loan
i64,f64,i64,str,str,bool,bool,bool
21,577.0,45,"""Pisces""","""Semi-Monthly""",False,False,False
31,519.0,0,"""Virgo""","""Bi-Weekly""",True,False,False


In [6]:
df_fintech_2.dtypes

[Int64, Float64, Int64, String, String, Boolean, Boolean, Boolean]

In [7]:
def transform_bool(df):
    for c in df.select(cs.all() - cs.numeric() - cs.string()).columns:
        df = df.with_columns(pl.lit(True).alias(c))
    return df

def transform_str(df):
    for c in df.select(cs.string()).columns:
        df = df.with_columns(pl.col(c).str.replace_all("e",""))
    return df

def transform_numeric(df):
    for c in df.select(cs.numeric()).columns:
        df = df.with_columns(pl.col(c)*2)
    return df

def transform_extracols(df):
    df = df.with_columns([
        (pl.col("purchases").mean()).alias("purchases_mean").cast(pl.Int64),
        (pl.col("age").median()).alias("age_median"),
        (pl.col("credit_score").mean()).alias("score_mean")
    ])
    return df

In [8]:
df_fintech_2 = df_fintech_2.pipe(transform_str).pipe(transform_numeric).pipe(transform_bool).pipe(transform_extracols)
# result = (
#    df.lazy()
#    .pipe(add_position_column)
#    .pipe(add_squad_number_column)
#    .collect()
#)
#
#result
# https://typethepipe.com/vizs-and-tips/python-polars-pipe-function-to-one-more-columns/ 

In [9]:
df_fintech_2.dtypes

[Int64,
 Float64,
 Int64,
 String,
 String,
 Boolean,
 Boolean,
 Boolean,
 Int64,
 Float64,
 Float64]

In [10]:
df_fintech_2.head()

age,credit_score,purchases,zodiac_sign,payment_type,churn,cancelled_loan,received_loan,purchases_mean,age_median,score_mean
i64,f64,i64,str,str,bool,bool,bool,i64,f64,f64
42,1154.0,90,"""Piscs""","""Smi-Monthly""",True,True,True,6,60.0,1085.152626
62,1038.0,0,"""Virgo""","""Bi-Wkly""",True,True,True,6,60.0,1085.152626
52,1085.0312,0,"""Sagittarius""","""Wkly""",True,True,True,6,60.0,1085.152626
66,1116.0,0,"""Lo""","""Bi-Wkly""",True,True,True,6,60.0,1085.152626
52,1118.0,0,"""Virgo""","""Bi-Wkly""",True,True,True,6,60.0,1085.152626


---

In [13]:
# Apply transform df_orig inside the assert function.
def assert_transform(df_orig):
    
    #transform orig
    df_orig = df_orig.pipe(transform_str).pipe(transform_numeric).pipe(transform_bool).pipe(transform_extracols)
    #expected df
    df_expected = pl.DataFrame({
    'age':[42,62,52,66,52],
    'credit_score':[1154.0000,1038.0000,1085.0312,1116.0000,1118.0000],
    'purchases':[90,0,0,0,0],
    'zodiac_sign':['Piscs','Virgo','Sagittarius','Lo','Virgo'],
    'payment_type':['Smi-Monthly','Bi-Wkly','Wkly','Bi-Wkly','Bi-Wkly'],
    'churn':[True,True,True,True,True],
    'cancelled_loan':[True,True,True,True,True],
    'received_loan':[True,True,True,True,True],
    'purchases_mean':[18,18,18,18,18],
    'age_median':[52.0,52.0,52.0,52.0,52.0],
    'score_mean':[1102.20624,1102.20624,1102.20624,1102.20624,1102.20624]})
    
    assert_frame_equal(df_expected, df_orig)

In [14]:
assert_transform(df_fintech_3.head())