# Polars Extensions Usage

## Read & Write Schemas

In [5]:
import polars as pl
import polars_extensions as plx

In [6]:
data = pl.read_csv('datasets/employees.csv')
data

employee_id,first_name,last_name,email,job_title,date_of_birth,date_of_hire,salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [7]:
plx.write_schema(data,'schema.json')

In [8]:
schema = plx.read_schema('schema.json')
schema

Schema([('employee_id', Int64),
        ('first_name', String),
        ('last_name', String),
        ('email', String),
        ('job_title', String),
        ('date_of_birth', String),
        ('date_of_hire', String),
        ('salary', Int64)])

## Case Conventions

In [9]:
import polars as pl
from polars_extensions import * #Import all extensions

data = pl.read_csv('datasets/employees.csv')
data

employee_id,first_name,last_name,email,job_title,date_of_birth,date_of_hire,salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [10]:
data.name_ext.to_kebeb_case()

employee-id,first-name,last-name,email,job-title,date-of-birth,date-of-hire,salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [11]:
data.name_ext.to_train_case()

Employee-Id,First-Name,Last-Name,Email,Job-Title,Date-Of-Birth,Date-Of-Hire,Salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [12]:
data.name_ext.to_pascal_case()


EmployeeId,FirstName,LastName,Email,JobTitle,DateOfBirth,DateOfHire,Salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [13]:
data.name_ext.to_snake_case()

employee_id,first_name,last_name,email,job_title,date_of_birth,date_of_hire,salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [14]:
data.name_ext.to_camel_case()

employeeId,firstName,lastName,email,jobTitle,dateOfBirth,dateOfHire,salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


In [15]:
data.name_ext.to_pascal_snake_case()


Employee_Id,First_Name,Last_Name,Email,Job_Title,Date_Of_Birth,Date_Of_Hire,Salary
i64,str,str,str,str,str,str,i64
1,"""john""","""doe""","""john.doe@example.com""","""software_engineer""","""1990-05-12""","""2015-08-01""",85000
2,"""jane""","""smith""","""jane.smith@example.com""","""data_scientist""","""1988-11-23""","""2017-03-15""",95000
3,"""bob""","""johnson""","""bob.johnson@example.com""","""product_manager""","""1985-07-19""","""2012-10-10""",105000
4,"""alice""","""davis""","""alice.davis@example.com""","""ux_designer""","""1992-04-06""","""2020-01-21""",78000
5,"""charlie""","""brown""","""charlie.brown@example.com""","""qa_engineer""","""1993-09-14""","""2019-07-08""",72000


## Numeric 

In [16]:
import polars as pl
df = pl.DataFrame({"numbers": [1, 2, 309, 4, 5]})
df

numbers
i64
1
2
309
4
5


In [17]:
import polars_extensions as plx

result = df.with_columns(
    pl.col('numbers').num_ext.to_roman().alias("Roman")
)

result

numbers,Roman
i64,str
1,"""I"""
2,"""II"""
309,"""CCCIX"""
4,"""IV"""
5,"""V"""


In [18]:
new_result = result.with_columns(
    pl.col('Roman').num_ext.from_roman().alias("Decoded")
)
new_result

numbers,Roman,Decoded
i64,str,i64
1,"""I""",1
2,"""II""",2
309,"""CCCIX""",309
4,"""IV""",4
5,"""V""",5


## String

In [19]:
import polars_extensions as plx
import polars as pl 

data = pl.read_csv('datasets/string_sim.csv')

data.str_ext.f1_string_similarity('a','c')

a,c,f1_score
str,str,f64
"""apple""","""appl""",0.888889
"""banana""","""BANANA""",1.0
"""cherry""","""cherr""",0.909091
"""date""","""etad""",1.0
"""elderberry""","""elderberrys""",0.952381
…,…,…
"""kiwi""","""KIW""",0.857143
"""lemon""","""lemons""",0.909091
"""mangoes are Tangy""","""mango are Tangy""",0.9375
"""it was the best of times""","""it was the worst of times""",0.897959
