# Cleaning Data with Polars

In [3]:
import pandas as pd
import polars as pl
import seaborn as sns

planets = sns.load_dataset('planets')
planets.shape

(1035, 6)

In [5]:
# Checking for null values
planets.isnull().sum()

method              0
number              0
orbital_period     43
mass              522
distance          227
year                0
dtype: int64

In [9]:
df_pd = pd.DataFrame(planets)
df_pl = pl.DataFrame(planets)

In [10]:
df_pd['mass'] = df_pd['mass'].fillna(df_pd['mass'].mean())

In [11]:
df_pl = df_pl.with_columns(pl.col('mass').fill_null(strategy='mean'))

In [13]:
df_pl.select(pl.all().is_null().sum())

method,number,orbital_period,mass,distance,year
u32,u32,u32,u32,u32,u32
0,0,43,0,227,0


In [14]:
df_pd_2 = df_pd.dropna()
df_pd_2

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.100000,77.40,2006
1,Radial Velocity,1,874.774000,2.210000,56.95,2008
2,Radial Velocity,1,763.000000,2.600000,19.84,2011
3,Radial Velocity,1,326.030000,19.400000,110.62,2007
4,Radial Velocity,1,516.220000,10.500000,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,2.638161,172.00,2006
1031,Transit,1,2.615864,2.638161,148.00,2007
1032,Transit,1,3.191524,2.638161,174.00,2007
1033,Transit,1,4.125083,2.638161,293.00,2008


In [15]:
df_pl_2 = df_pl.drop_nulls()
df_pl_2

method,number,orbital_period,mass,distance,year
str,i64,f64,f64,f64,i64
"""Radial Velocity""",1,269.3,7.1,77.4,2006
"""Radial Velocity""",1,874.774,2.21,56.95,2008
"""Radial Velocity""",1,763.0,2.6,19.84,2011
"""Radial Velocity""",1,326.03,19.4,110.62,2007
"""Radial Velocity""",1,516.22,10.5,119.47,2009
…,…,…,…,…,…
"""Transit""",1,3.941507,2.638161,172.0,2006
"""Transit""",1,2.615864,2.638161,148.0,2007
"""Transit""",1,3.1915239,2.638161,174.0,2007
"""Transit""",1,4.1250828,2.638161,293.0,2008


In [17]:
df_pl_3 = df_pl.drop_nulls('orbital_period')
df_pl_3

method,number,orbital_period,mass,distance,year
str,i64,f64,f64,f64,i64
"""Radial Velocity""",1,269.3,7.1,77.4,2006
"""Radial Velocity""",1,874.774,2.21,56.95,2008
"""Radial Velocity""",1,763.0,2.6,19.84,2011
"""Radial Velocity""",1,326.03,19.4,110.62,2007
"""Radial Velocity""",1,516.22,10.5,119.47,2009
…,…,…,…,…,…
"""Transit""",1,3.941507,2.638161,172.0,2006
"""Transit""",1,2.615864,2.638161,148.0,2007
"""Transit""",1,3.1915239,2.638161,174.0,2007
"""Transit""",1,4.1250828,2.638161,293.0,2008
