In [1]:
import polars as pl
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

In [29]:
# create dataframe 01

df = pl.DataFrame(
    {
        "integer": [1, 2, 3],
        "date": [
            datetime(2025, 1, 1),
            datetime(2025, 1, 2),
            datetime(2025, 1, 3),
        ],
        "float": [5.0, 4.0, 6.0],
        "string": ["c", "b", "a"],
    }
)

print(df)

shape: (3, 4)
┌─────────┬─────────────────────┬───────┬────────┐
│ integer ┆ date                ┆ float ┆ string │
│ ---     ┆ ---                 ┆ ---   ┆ ---    │
│ i64     ┆ datetime[μs]        ┆ f64   ┆ str    │
╞═════════╪═════════════════════╪═══════╪════════╡
│ 1       ┆ 2025-01-01 00:00:00 ┆ 5.0   ┆ c      │
│ 2       ┆ 2025-01-02 00:00:00 ┆ 4.0   ┆ b      │
│ 3       ┆ 2025-01-03 00:00:00 ┆ 6.0   ┆ a      │
└─────────┴─────────────────────┴───────┴────────┘


In [30]:
df

integer,date,float,string
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,5.0,"""c"""
2,2025-01-02 00:00:00,4.0,"""b"""
3,2025-01-03 00:00:00,6.0,"""a"""


In [31]:
df[0]

integer,date,float,string
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,5.0,"""c"""


In [32]:
df.__len__()

3

In [33]:
df[-1]

integer,date,float,string
i64,datetime[μs],f64,str
3,2025-01-03 00:00:00,6.0,"""a"""


In [34]:
df.bottom_k(1,by="string")

integer,date,float,string
i64,datetime[μs],f64,str
3,2025-01-03 00:00:00,6.0,"""a"""


In [35]:
df.bottom_k(1,by="float")

integer,date,float,string
i64,datetime[μs],f64,str
2,2025-01-02 00:00:00,4.0,"""b"""


In [44]:
df.top_k(1,by="string")

integer,date,float,string
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,5.0,"""c"""


In [43]:
df.top_k(1,by="string",reverse=True)

integer,date,float,string
i64,datetime[μs],f64,str
3,2025-01-03 00:00:00,6.0,"""a"""


In [66]:
# return number of non null values from each column
df.count()

integer,date,float,string
u32,u32,u32,u32
3,3,3,3


In [73]:
# return number of non null values from a specific column
df['string'].count()

3

In [58]:
print("Column names -> df.columns : ",df.columns)

Column names -> df.columns :  ['integer', 'date', 'float', 'string']


In [65]:
# describe the df
df.describe()

statistic,integer,date,float,string
str,f64,str,f64,str
"""count""",3.0,"""3""",3.0,"""3"""
"""null_count""",0.0,"""0""",0.0,"""0"""
"""mean""",2.0,"""2025-01-02 00:00:00""",5.0,
"""std""",1.0,,1.0,
"""min""",1.0,"""2025-01-01 00:00:00""",4.0,"""a"""
"""25%""",2.0,"""2025-01-02 00:00:00""",5.0,
"""50%""",2.0,"""2025-01-02 00:00:00""",5.0,
"""75%""",3.0,"""2025-01-03 00:00:00""",6.0,
"""max""",3.0,"""2025-01-03 00:00:00""",6.0,"""c"""


In [51]:
print("Number of rows -> df.height : ", df.height)
print("Number of columns -> df.width : ", df.width)

Number of rows -> df.height :  3
Number of columns -> df.width :  4


In [56]:
print("Shape of dataframe -> df.shape : " ,df.shape)

Shape of dataframe -> df.shape :  (3, 4)


In [60]:
# display first n rows
df.head(2)

integer,date,float,string
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,5.0,"""c"""
2,2025-01-02 00:00:00,4.0,"""b"""


In [61]:
# skip last n rows
df.head(-2)

integer,date,float,string
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,5.0,"""c"""


In [62]:
# display last n rows
df.tail(2)

integer,date,float,string
i64,datetime[μs],f64,str
2,2025-01-02 00:00:00,4.0,"""b"""
3,2025-01-03 00:00:00,6.0,"""a"""


In [63]:
# skip first n rows
df.tail(-2)

integer,date,float,string
i64,datetime[μs],f64,str
3,2025-01-03 00:00:00,6.0,"""a"""


In [81]:
df.with_columns(pl.Series([4,7,8]).alias('test'))

integer,date,float,string,test
i64,datetime[μs],f64,str,i64
1,2025-01-01 00:00:00,5.0,"""c""",4
2,2025-01-02 00:00:00,4.0,"""b""",7
3,2025-01-03 00:00:00,6.0,"""a""",8


In [None]:
df