In [60]:
import pandas as pd
import polars as pl
import numpy as np
import pathlib
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white")

In [61]:
IDENTITY_INPUT_PATH = pathlib.Path("../data/train_identity.csv")
IDENTITY_OUTPUT_PATH = pathlib.Path("../data/train_identity_processed.parquet")
TRANSACTION_INPUT_PATH = pathlib.Path("../data/train_transaction.csv")
TRANSACTION_OUTPUT_PATH = pathlib.Path("../data/train_transaction_processed.parquet")

In [62]:
def show_correlation(correlation: pd.DataFrame, vmin: int = -1, vmax: int = 1) -> None:
    # Compute the correlation matrix
    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(correlation, dtype=bool))

    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(10, 10))

    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(230, 20, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(correlation, mask=mask, vmin=vmin, vmax=vmax, annot=False, cmap=cmap, ax=ax)

def show_correlation_with_target(correlation: pd.DataFrame, target_col: str) -> None:
    plt.figure(figsize=(20, 20))
    heatmap = sns.heatmap(correlation[[target_col]].sort_values(by=target_col, ascending=False).drop(index=target_col), vmin=-1, vmax=1, annot=True, cmap='BrBG')
    heatmap.set_title('Features Correlating with Target', fontdict={'fontsize':18}, pad=10)

In [63]:
transactions: pl.LazyFrame = pl.scan_csv(TRANSACTION_INPUT_PATH)
transactions.head().collect()


TransactionID,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,dist1,dist2,P_emaildomain,R_emaildomain,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,D1,D2,D3,D4,D5,D6,…,V303,V304,V305,V306,V307,V308,V309,V310,V311,V312,V313,V314,V315,V316,V317,V318,V319,V320,V321,V322,V323,V324,V325,V326,V327,V328,V329,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
i64,i64,i64,f64,str,i64,f64,f64,str,f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2987000,0,86400,68.5,"""W""",13926,,150.0,"""discover""",142.0,"""credit""",315.0,87.0,19.0,,,,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,14.0,,13.0,,,,…,0.0,0.0,1.0,0.0,117.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
2987001,0,86401,29.0,"""W""",2755,404.0,150.0,"""mastercard""",102.0,"""credit""",325.0,87.0,,,"""gmail.com""",,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,,,0.0,,,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
2987002,0,86469,59.0,"""W""",4663,490.0,150.0,"""visa""",166.0,"""debit""",330.0,87.0,287.0,,"""outlook.com""",,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,,,0.0,,,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
2987003,0,86499,50.0,"""W""",18132,567.0,150.0,"""mastercard""",117.0,"""debit""",476.0,87.0,,,"""yahoo.com""",,2.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,1.0,0.0,25.0,1.0,112.0,112.0,0.0,94.0,0.0,,…,0.0,0.0,1.0,50.0,1758.0,925.0,0.0,354.0,0.0,135.0,0.0,0.0,0.0,50.0,1404.0,790.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
2987004,0,86506,50.0,"""H""",4497,514.0,150.0,"""mastercard""",102.0,"""credit""",420.0,87.0,,,"""gmail.com""",,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,,,,,,…,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [64]:
transactions.describe()

statistic,TransactionID,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,dist1,dist2,P_emaildomain,R_emaildomain,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,D1,D2,D3,D4,D5,…,V303,V304,V305,V306,V307,V308,V309,V310,V311,V312,V313,V314,V315,V316,V317,V318,V319,V320,V321,V322,V323,V324,V325,V326,V327,V328,V329,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
str,f64,f64,f64,f64,str,f64,f64,f64,str,f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",590540.0,590540.0,590540.0,590540.0,"""590540""",590540.0,581607.0,588975.0,"""588963""",586281.0,"""588969""",524834.0,524834.0,238269.0,37627.0,"""496084""","""137291""",590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,589271.0,309743.0,327662.0,421618.0,280699.0,…,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,589271.0,589271.0,589271.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0
"""null_count""",0.0,0.0,0.0,0.0,"""0""",0.0,8933.0,1565.0,"""1577""",4259.0,"""1571""",65706.0,65706.0,352271.0,552913.0,"""94456""","""453249""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1269.0,280797.0,262878.0,168922.0,309841.0,…,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,1269.0,1269.0,1269.0,12.0,12.0,12.0,12.0,12.0,12.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0
"""mean""",3282269.5,0.03499,7372300.0,135.027176,,9898.734658,362.555488,153.194925,,199.278897,,290.733794,86.80063,118.50218,231.855423,,,14.092458,15.269734,0.005644,4.092185,5.571526,9.071082,2.848478,5.144574,4.48024,5.240343,10.241521,4.076227,32.539918,8.295215,94.347568,169.563231,28.343348,140.002441,42.335965,…,0.28314,0.264208,1.000007,139.748713,408.682375,230.41318,10.995986,118.195658,4.202175,39.17391,21.351473,43.319174,26.806977,109.818544,247.606741,162.153398,18.372476,42.073133,28.326584,6.220289,13.103775,9.184612,0.058494,0.85104,0.296633,0.33679,1.312844,0.775874,721.741883,1375.783644,1014.622782,9.807015,59.16455,28.530903,55.352422,151.160542,100.700882
"""std""",170474.358321,0.183755,4617200.0,239.162522,,4901.170153,157.793246,11.336444,,41.244453,,101.741072,2.690623,371.872026,529.053494,,,133.569018,154.668899,0.150536,68.848459,25.786976,71.508467,61.727304,95.378574,16.674897,95.581443,94.336292,86.666218,129.364844,49.544262,157.660387,177.315865,62.384721,191.096774,89.000144,…,0.623608,0.528238,0.002603,2348.849634,4391.992977,3021.924247,116.254277,352.983093,102.374938,172.128339,95.90297,173.619028,116.853222,2270.033202,3980.042828,2793.343636,332.304848,473.499307,382.053171,56.022561,106.739813,73.627893,0.304415,3.950295,1.364356,1.580144,8.769083,4.727971,6217.223583,11169.275702,7955.735482,243.861391,387.62948,274.57692,668.486833,1095.034387,814.946722
"""min""",2987000.0,0.0,86400.0,0.251,"""C""",1000.0,100.0,100.0,"""american expre…",100.0,"""charge card""",100.0,10.0,0.0,0.0,"""aim.com""","""aim.com""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-122.0,0.0,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""25%""",3134635.0,0.0,3027065.0,43.321,,6019.0,214.0,150.0,,166.0,,204.0,87.0,3.0,7.0,,,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,26.0,1.0,0.0,1.0,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""50%""",3282270.0,0.0,7306535.0,68.78,,9678.0,361.0,150.0,,226.0,,299.0,87.0,8.0,37.0,,,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,3.0,1.0,3.0,97.0,8.0,26.0,10.0,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""75%""",3429904.0,0.0,11246605.0,125.0,,14184.0,512.0,150.0,,226.0,,330.0,87.0,24.0,206.0,,,3.0,3.0,0.0,0.0,1.0,2.0,0.0,0.0,2.0,0.0,2.0,0.0,12.0,2.0,122.0,276.0,27.0,253.0,32.0,…,0.0,0.0,1.0,0.0,151.380005,35.970001,0.0,107.949997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""max""",3577539.0,1.0,15811131.0,31937.391,"""W""",18396.0,600.0,231.0,"""visa""",237.0,"""debit or credi…",540.0,102.0,10286.0,11623.0,"""ymail.com""","""ymail.com""",4685.0,5691.0,26.0,2253.0,349.0,2253.0,2255.0,3331.0,210.0,3257.0,3188.0,3188.0,2918.0,1429.0,640.0,640.0,819.0,869.0,819.0,…,20.0,16.0,2.0,108800.0,145765.0,108800.0,55125.0,55125.0,55125.0,55125.0,4817.470215,7519.870117,4817.470215,93736.0,134021.0,98476.0,104060.0,104060.0,104060.0,880.0,1411.0,976.0,12.0,44.0,18.0,15.0,99.0,55.0,160000.0,160000.0,160000.0,55125.0,55125.0,55125.0,104060.0,104060.0,104060.0


In [65]:
categorical_transactions: pl.LazyFrame = transactions.select(pl.col(pl.String))
categorical_transactions.head().collect()

ProductCD,card4,card6,P_emaildomain,R_emaildomain,M1,M2,M3,M4,M5,M6,M7,M8,M9
str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""W""","""discover""","""credit""",,,"""T""","""T""","""T""","""M2""","""F""","""T""",,,
"""W""","""mastercard""","""credit""","""gmail.com""",,,,,"""M0""","""T""","""T""",,,
"""W""","""visa""","""debit""","""outlook.com""",,"""T""","""T""","""T""","""M0""","""F""","""F""","""F""","""F""","""F"""
"""W""","""mastercard""","""debit""","""yahoo.com""",,,,,"""M0""","""T""","""F""",,,
"""H""","""mastercard""","""credit""","""gmail.com""",,,,,,,,,,


## Analyze categorical columns

In [66]:
categorical_transactions.describe()

statistic,ProductCD,card4,card6,P_emaildomain,R_emaildomain,M1,M2,M3,M4,M5,M6,M7,M8,M9
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""590540""","""588963""","""588969""","""496084""","""137291""","""319440""","""319440""","""319440""","""309096""","""240058""","""421180""","""244275""","""244288""","""244288"""
"""null_count""","""0""","""1577""","""1571""","""94456""","""453249""","""271100""","""271100""","""271100""","""281444""","""350482""","""169360""","""346265""","""346252""","""346252"""
"""mean""",,,,,,,,,,,,,,
"""std""",,,,,,,,,,,,,,
"""min""","""C""","""american expre…","""charge card""","""aim.com""","""aim.com""","""F""","""F""","""F""","""M0""","""F""","""F""","""F""","""F""","""F"""
"""25%""",,,,,,,,,,,,,,
"""50%""",,,,,,,,,,,,,,
"""75%""",,,,,,,,,,,,,,
"""max""","""W""","""visa""","""debit or credi…","""ymail.com""","""ymail.com""","""T""","""T""","""T""","""M2""","""T""","""T""","""T""","""T""","""T"""


#### ProductCD

In [67]:
transactions.select("ProductCD").collect().to_series().value_counts()

ProductCD,count
str,u32
"""H""",33024
"""C""",68519
"""W""",439670
"""S""",11628
"""R""",37699


#### card4

In [68]:
transactions.select("card4").collect().to_series().value_counts()

card4,count
str,u32
"""mastercard""",189217
,1577
"""american expre…",8328
"""visa""",384767
"""discover""",6651


#### card_6

In [69]:
transactions.select("card6").collect().to_series().value_counts()

card6,count
str,u32
"""debit or credi…",30
"""credit""",148986
"""charge card""",15
,1571
"""debit""",439938


#### P-emaildomain

In [70]:
transactions.select("P_emaildomain").collect().to_series().value_counts()

P_emaildomain,count
str,u32
"""outlook.es""",438
"""yahoo.co.jp""",32
"""att.net""",4033
"""sbcglobal.net""",2970
"""cableone.net""",159
…,…
"""protonmail.com…",76
"""yahoo.es""",134
"""netzero.net""",196
"""msn.com""",4092


In [71]:
transactions.select("P_emaildomain").with_columns(
    pl.col("P_emaildomain").str.split(".").list.first().fill_null("unknown")
).collect()

P_emaildomain
str
"""unknown"""
"""gmail"""
"""outlook"""
"""yahoo"""
"""gmail"""
…
"""unknown"""
"""gmail"""
"""gmail"""
"""aol"""


#### R_emaildomain

In [72]:
transactions.select("R_emaildomain").collect().to_series().value_counts(sort=True)

R_emaildomain,count
str,u32
,453249
"""gmail.com""",57147
"""hotmail.com""",27509
"""anonymous.com""",20529
"""yahoo.com""",11842
…,…
"""frontiernet.ne…",14
"""netzero.com""",14
"""centurylink.ne…",12
"""netzero.net""",9


In [73]:
transactions.select("R_emaildomain").with_columns(
    pl.col("R_emaildomain").str.split(".").list.first().fill_null("unknown")
).collect()

R_emaildomain
str
"""unknown"""
"""unknown"""
"""unknown"""
"""unknown"""
"""unknown"""
…
"""unknown"""
"""unknown"""
"""unknown"""
"""unknown"""


#### M1 to M9

In [74]:
for index in range(1, 10):
    transactions = transactions.with_columns(
        pl.col(f"M{index}").fill_null("unknown")
    )

In [75]:
def convert_categorical_columns(dataframe: pl.LazyFrame) -> pl.LazyFrame:
    transforms: list[pl.Expr] = []
    for index in range(1, 10):
        transforms.append(
            pl.col(f"M{index}").fill_null("unknown")
        )
        
    for column in ["card4", "card6", "ProductCD"]:
        transforms.append(
            pl.col(column).fill_null("unknown")
        )
        
    return dataframe.with_columns(
        *transforms,
        pl.col("R_emaildomain").str.split(".").list.first().fill_null("unknown"),
        pl.col("P_emaildomain").str.split(".").list.first().fill_null("unknown")
    )

In [76]:
transactions = convert_categorical_columns(transactions)
transactions.describe()

statistic,TransactionID,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,dist1,dist2,P_emaildomain,R_emaildomain,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,D1,D2,D3,D4,D5,…,V303,V304,V305,V306,V307,V308,V309,V310,V311,V312,V313,V314,V315,V316,V317,V318,V319,V320,V321,V322,V323,V324,V325,V326,V327,V328,V329,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
str,f64,f64,f64,f64,str,f64,f64,f64,str,f64,str,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",590540.0,590540.0,590540.0,590540.0,"""590540""",590540.0,581607.0,588975.0,"""590540""",586281.0,"""590540""",524834.0,524834.0,238269.0,37627.0,"""590540""","""590540""",590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,589271.0,309743.0,327662.0,421618.0,280699.0,…,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,589271.0,589271.0,589271.0,590528.0,590528.0,590528.0,590528.0,590528.0,590528.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0,82351.0
"""null_count""",0.0,0.0,0.0,0.0,"""0""",0.0,8933.0,1565.0,"""0""",4259.0,"""0""",65706.0,65706.0,352271.0,552913.0,"""0""","""0""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1269.0,280797.0,262878.0,168922.0,309841.0,…,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,1269.0,1269.0,1269.0,12.0,12.0,12.0,12.0,12.0,12.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0,508189.0
"""mean""",3282269.5,0.03499,7372300.0,135.027176,,9898.734658,362.555488,153.194925,,199.278897,,290.733794,86.80063,118.50218,231.855423,,,14.092458,15.269734,0.005644,4.092185,5.571526,9.071082,2.848478,5.144574,4.48024,5.240343,10.241521,4.076227,32.539918,8.295215,94.347568,169.563231,28.343348,140.002441,42.335965,…,0.28314,0.264208,1.000007,139.748713,408.682375,230.41318,10.995986,118.195658,4.202175,39.17391,21.351473,43.319174,26.806977,109.818544,247.606741,162.153398,18.372476,42.073133,28.326584,6.220289,13.103775,9.184612,0.058494,0.85104,0.296633,0.33679,1.312844,0.775874,721.741883,1375.783644,1014.622782,9.807015,59.16455,28.530903,55.352422,151.160542,100.700882
"""std""",170474.358321,0.183755,4617200.0,239.162522,,4901.170153,157.793246,11.336444,,41.244453,,101.741072,2.690623,371.872026,529.053494,,,133.569018,154.668899,0.150536,68.848459,25.786976,71.508467,61.727304,95.378574,16.674897,95.581443,94.336292,86.666218,129.364844,49.544262,157.660387,177.315865,62.384721,191.096774,89.000144,…,0.623608,0.528238,0.002603,2348.849634,4391.992977,3021.924247,116.254277,352.983093,102.374938,172.128339,95.90297,173.619028,116.853222,2270.033202,3980.042828,2793.343636,332.304848,473.499307,382.053171,56.022561,106.739813,73.627893,0.304415,3.950295,1.364356,1.580144,8.769083,4.727971,6217.223583,11169.275702,7955.735482,243.861391,387.62948,274.57692,668.486833,1095.034387,814.946722
"""min""",2987000.0,0.0,86400.0,0.251,"""C""",1000.0,100.0,100.0,"""american expre…",100.0,"""charge card""",100.0,10.0,0.0,0.0,"""aim""","""aim""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-122.0,0.0,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""25%""",3134635.0,0.0,3027065.0,43.321,,6019.0,214.0,150.0,,166.0,,204.0,87.0,3.0,7.0,,,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,26.0,1.0,0.0,1.0,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""50%""",3282270.0,0.0,7306535.0,68.78,,9678.0,361.0,150.0,,226.0,,299.0,87.0,8.0,37.0,,,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,3.0,1.0,3.0,97.0,8.0,26.0,10.0,…,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""75%""",3429904.0,0.0,11246605.0,125.0,,14184.0,512.0,150.0,,226.0,,330.0,87.0,24.0,206.0,,,3.0,3.0,0.0,0.0,1.0,2.0,0.0,0.0,2.0,0.0,2.0,0.0,12.0,2.0,122.0,276.0,27.0,253.0,32.0,…,0.0,0.0,1.0,0.0,151.380005,35.970001,0.0,107.949997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""max""",3577539.0,1.0,15811131.0,31937.391,"""W""",18396.0,600.0,231.0,"""visa""",237.0,"""unknown""",540.0,102.0,10286.0,11623.0,"""ymail""","""ymail""",4685.0,5691.0,26.0,2253.0,349.0,2253.0,2255.0,3331.0,210.0,3257.0,3188.0,3188.0,2918.0,1429.0,640.0,640.0,819.0,869.0,819.0,…,20.0,16.0,2.0,108800.0,145765.0,108800.0,55125.0,55125.0,55125.0,55125.0,4817.470215,7519.870117,4817.470215,93736.0,134021.0,98476.0,104060.0,104060.0,104060.0,880.0,1411.0,976.0,12.0,44.0,18.0,15.0,99.0,55.0,160000.0,160000.0,160000.0,55125.0,55125.0,55125.0,104060.0,104060.0,104060.0


In [77]:
transactions = transactions.with_columns(
    *[pl.col(col).fill_null(pl.col(col).median()) for col in transactions.select(pl.col(pl.NUMERIC_DTYPES)).columns]
)