In [23]:
import pandas as pd
import polars as pl
import seaborn as sns

diamonds = sns.load_dataset('diamonds')

## This is how we can approach a common task (adding a column), and column-column calculation in Polars comapared to Pandas

In [24]:
df_pd = pd.DataFrame(diamonds)
df_pd['USD_Euro_conversion'] = 0.92
df_pd['price_euro'] = df_pd['price'] * df_pd['USD_Euro_conversion']
df_pd

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z,USD_Euro_conversion,price_euro
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43,0.92,299.92
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31,0.92,299.92
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31,0.92,300.84
3,0.29,Premium,I,VS2,62.4,58.0,334,4.20,4.23,2.63,0.92,307.28
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75,0.92,308.20
...,...,...,...,...,...,...,...,...,...,...,...,...
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.50,0.92,2536.44
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61,0.92,2536.44
53937,0.70,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56,0.92,2536.44
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74,0.92,2536.44


In [25]:
df_pl = pl.DataFrame(diamonds)
df_pl = df_pl.with_columns(pl.lit(0.92).alias('USD_Euro_conversion'))
df_pl = df_pl.with_columns((df_pl['price'] * df_pl['USD_Euro_conversion']).alias('price_euro'))
df_pl

carat,cut,color,clarity,depth,table,price,x,y,z,USD_Euro_conversion,price_euro
f64,cat,cat,cat,f64,f64,i64,f64,f64,f64,f64,f64
0.23,"""Ideal""","""E""","""SI2""",61.5,55.0,326,3.95,3.98,2.43,0.92,299.92
0.21,"""Premium""","""E""","""SI1""",59.8,61.0,326,3.89,3.84,2.31,0.92,299.92
0.23,"""Good""","""E""","""VS1""",56.9,65.0,327,4.05,4.07,2.31,0.92,300.84
0.29,"""Premium""","""I""","""VS2""",62.4,58.0,334,4.2,4.23,2.63,0.92,307.28
0.31,"""Good""","""J""","""SI2""",63.3,58.0,335,4.34,4.35,2.75,0.92,308.2
…,…,…,…,…,…,…,…,…,…,…,…
0.72,"""Ideal""","""D""","""SI1""",60.8,57.0,2757,5.75,5.76,3.5,0.92,2536.44
0.72,"""Good""","""D""","""SI1""",63.1,55.0,2757,5.69,5.75,3.61,0.92,2536.44
0.7,"""Very Good""","""D""","""SI1""",62.8,60.0,2757,5.66,5.68,3.56,0.92,2536.44
0.86,"""Premium""","""H""","""SI2""",61.0,58.0,2757,6.15,6.12,3.74,0.92,2536.44


## Another common operation in data manipuation is dropping columns

In [26]:
df_pd = df_pd.drop(columns=['USD_Euro_conversion'])
df_pd

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z,price_euro
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43,299.92
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31,299.92
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31,300.84
3,0.29,Premium,I,VS2,62.4,58.0,334,4.20,4.23,2.63,307.28
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75,308.20
...,...,...,...,...,...,...,...,...,...,...,...
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.50,2536.44
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61,2536.44
53937,0.70,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56,2536.44
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74,2536.44


In [27]:
df_pl = df_pl.drop('USD_Euro_conversion')
df_pl

carat,cut,color,clarity,depth,table,price,x,y,z,price_euro
f64,cat,cat,cat,f64,f64,i64,f64,f64,f64,f64
0.23,"""Ideal""","""E""","""SI2""",61.5,55.0,326,3.95,3.98,2.43,299.92
0.21,"""Premium""","""E""","""SI1""",59.8,61.0,326,3.89,3.84,2.31,299.92
0.23,"""Good""","""E""","""VS1""",56.9,65.0,327,4.05,4.07,2.31,300.84
0.29,"""Premium""","""I""","""VS2""",62.4,58.0,334,4.2,4.23,2.63,307.28
0.31,"""Good""","""J""","""SI2""",63.3,58.0,335,4.34,4.35,2.75,308.2
…,…,…,…,…,…,…,…,…,…,…
0.72,"""Ideal""","""D""","""SI1""",60.8,57.0,2757,5.75,5.76,3.5,2536.44
0.72,"""Good""","""D""","""SI1""",63.1,55.0,2757,5.69,5.75,3.61,2536.44
0.7,"""Very Good""","""D""","""SI1""",62.8,60.0,2757,5.66,5.68,3.56,2536.44
0.86,"""Premium""","""H""","""SI2""",61.0,58.0,2757,6.15,6.12,3.74,2536.44


## Two final common operations in data manipuation are renaming columns and changing the dtype of a column

In [28]:
df_pl = df_pl.rename({'price_euro': 'price_in_euro'})
df_pl

carat,cut,color,clarity,depth,table,price,x,y,z,price_in_euro
f64,cat,cat,cat,f64,f64,i64,f64,f64,f64,f64
0.23,"""Ideal""","""E""","""SI2""",61.5,55.0,326,3.95,3.98,2.43,299.92
0.21,"""Premium""","""E""","""SI1""",59.8,61.0,326,3.89,3.84,2.31,299.92
0.23,"""Good""","""E""","""VS1""",56.9,65.0,327,4.05,4.07,2.31,300.84
0.29,"""Premium""","""I""","""VS2""",62.4,58.0,334,4.2,4.23,2.63,307.28
0.31,"""Good""","""J""","""SI2""",63.3,58.0,335,4.34,4.35,2.75,308.2
…,…,…,…,…,…,…,…,…,…,…
0.72,"""Ideal""","""D""","""SI1""",60.8,57.0,2757,5.75,5.76,3.5,2536.44
0.72,"""Good""","""D""","""SI1""",63.1,55.0,2757,5.69,5.75,3.61,2536.44
0.7,"""Very Good""","""D""","""SI1""",62.8,60.0,2757,5.66,5.68,3.56,2536.44
0.86,"""Premium""","""H""","""SI2""",61.0,58.0,2757,6.15,6.12,3.74,2536.44


In [29]:
df_pl.with_columns(pl.col('carat').cast(pl.Float32))

carat,cut,color,clarity,depth,table,price,x,y,z,price_in_euro
f32,cat,cat,cat,f64,f64,i64,f64,f64,f64,f64
0.23,"""Ideal""","""E""","""SI2""",61.5,55.0,326,3.95,3.98,2.43,299.92
0.21,"""Premium""","""E""","""SI1""",59.8,61.0,326,3.89,3.84,2.31,299.92
0.23,"""Good""","""E""","""VS1""",56.9,65.0,327,4.05,4.07,2.31,300.84
0.29,"""Premium""","""I""","""VS2""",62.4,58.0,334,4.2,4.23,2.63,307.28
0.31,"""Good""","""J""","""SI2""",63.3,58.0,335,4.34,4.35,2.75,308.2
…,…,…,…,…,…,…,…,…,…,…
0.72,"""Ideal""","""D""","""SI1""",60.8,57.0,2757,5.75,5.76,3.5,2536.44
0.72,"""Good""","""D""","""SI1""",63.1,55.0,2757,5.69,5.75,3.61,2536.44
0.7,"""Very Good""","""D""","""SI1""",62.8,60.0,2757,5.66,5.68,3.56,2536.44
0.86,"""Premium""","""H""","""SI2""",61.0,58.0,2757,6.15,6.12,3.74,2536.44
