In [1]:
import polars as pl
import polars_istr  # noqa: F401

# IBAN

In [2]:
# First str does not have a valid country code. So not an iban. Second and third are valid.
# Last one has invalid checksum
df = pl.DataFrame({
    "iban": ["AA110011123Z5678", "DE44500105175407324931", "AD1200012030200359100100", "MR0000020001010000123456754"]
})
df.head()

iban
str
"""AA110011123Z56…"
"""DE445001051754…"
"""AD120001203020…"
"""MR000002000101…"


In [3]:
df.select(
    pl.col("iban").iban.country_code().alias("country_code"),
    pl.col("iban").iban.check().alias("reason"),
    pl.col("iban").iban.is_valid().alias("is_valid"),
    pl.col("iban").iban.bban().alias("bban"),
    pl.col("iban").iban.bank_id().alias("bank_id"),
    pl.col("iban").iban.branch_id().alias("branch_id"),
) 

country_code,reason,is_valid,bban,bank_id,branch_id
str,str,bool,str,str,str
,"""Invalid countr…",False,,,
"""DE""","""ok""",True,"""50010517540732…","""50010517""",
"""AD""","""ok""",True,"""00012030200359…","""0001""","""2030"""
,"""Invalid checks…",False,,,


In [4]:
df.select(
    pl.col("iban").iban.extract_all().alias("ib")
).unnest("ib")

country_code,check_digits,bban,bank_id,branch_id
str,str,str,str,str
,,,,
"""DE""","""44""","""50010517540732…","""50010517""",
"""AD""","""12""","""00012030200359…","""0001""","""2030"""
,,,,


# ISIN

In [5]:
df = pl.DataFrame(
    {
        "isin": [
            "US0378331005", # AAPL
            "US0378331008", # AAPL w/ bad check digit
            "US037833100", # AAPL w/o check digit
            "CA00206RGB20", # Canadian
            "XS1550212416", # Other
            None,
        ]
    }
)
df.head()

isin
str
"""US0378331005"""
"""US0378331008"""
"""US037833100"""
"""CA00206RGB20"""
"""XS1550212416"""


In [6]:
df.select(
    pl.col("isin").isin.country_code().alias("country_code"),
    pl.col("isin").isin.check_digit().alias("check_digit"),
    pl.col("isin").isin.security_id().alias("security_id"),
    pl.col("isin").isin.is_valid().alias("is_valid"),
)

country_code,check_digit,security_id,is_valid
str,str,str,bool
"""US""","""5""","""037833100""",True
,,,False
,,,False
"""CA""","""0""","""00206RGB2""",True
"""XS""","""6""","""155021241""",True
,,,False


# URL

In [7]:
df = pl.DataFrame(
    {
        "url": [
            "https://example.com/data.csv#row=4",
            "google.com", 
            "ww.google.com", 
            "abc123@email.com", 
            "https://127.0.0.1/", 
            "https://test.com/",
            "file:///tmp/foo",
            "https://example.com/products?page=2&sort=desc",
            None,
        ]
    }
)

In [8]:
df.select(
    pl.col("url").url.host().alias("host"),
    pl.col("url").url.domain().alias("domain"),
    pl.col("url").url.fragment().alias("fragment"),
    pl.col("url").url.path().alias("path"),
    pl.col("url").url.query().alias("query"),
    pl.col("url").url.check().alias("check"),
    pl.col("url").url.is_valid().alias("is_valid"),
    pl.col("url").url.is_special().alias("is_special"),
)

host,domain,fragment,path,query,check,is_valid,is_special
str,str,str,str,str,str,bool,bool
"""example.com""","""example.com""","""row=4""","""/data.csv""",,"""ok""",True,True
,,,,,"""relative URL w…",False,
,,,,,"""relative URL w…",False,
,,,,,"""relative URL w…",False,
"""127.0.0.1""",,,"""/""",,"""ok""",True,True
"""test.com""","""test.com""",,"""/""",,"""ok""",True,True
,,,"""/tmp/foo""",,"""ok""",True,True
"""example.com""","""example.com""",,"""/products""","""page=2&sort=de…","""ok""",True,True
,,,,,,,


# CUSIP

In [9]:
df = pl.DataFrame({
    "cusip": [
        "303075105",  # regular cusip (FactSet - Common Stock)
        "30307510",  # regular cusip ex. check digit
        "G0052B105",  # regular CINS (Abingdon Capital PLC - Shares)
        "HELLOWORLD",  # Invalid
    ]
})

In [10]:
df.select(
        pl.col("cusip").cusip.issue_num().alias("issue_num"),
        pl.col("cusip").cusip.issuer_num().alias("issuer_num"),
        pl.col("cusip").cusip.check_digit().alias("check_digit"),
        pl.col("cusip").cusip.country_code().alias("country_code"),
        pl.col("cusip").cusip.payload().alias("payload"),
        pl.col("cusip").cusip.is_private_issue().alias("is_private_issue"),
        pl.col("cusip").cusip.has_private_issuer().alias("has_private_issuer"),
        pl.col("cusip").cusip.is_private_use().alias("is_private_use"),
        pl.col("cusip").cusip.is_cins().alias("is_cins"),
        pl.col("cusip").cusip.is_cins_base().alias("is_cins_base"),
        pl.col("cusip").cusip.is_cins_extended().alias("is_cins_extended"),
    )

issue_num,issuer_num,check_digit,country_code,payload,is_private_issue,has_private_issuer,is_private_use,is_cins,is_cins_base,is_cins_extended
str,str,str,str,str,bool,bool,bool,bool,bool,bool
"""10""","""303075""","""5""",,"""30307510""",False,False,False,False,,
,,,,,,,,,,
"""10""","""0052B""","""5""","""G""","""G0052B10""",False,False,False,True,True,False
,,,,,,,,,,
