In [1]:
import polars as pl
import numpy as np
import pandas as pd

# Create Dataframe

In [2]:
df = pl.DataFrame({'teacher':['John', 'Lucy', 'John', 'Tom', 'Helen'],
                   'price': np.random.randint(20,45,5),
                   'lessons': np.random.randint(10,25,5)
                   })
df.head(5)

teacher,price,lessons
str,i32,i32
"""John""",29,10
"""Lucy""",36,21
"""John""",22,17
"""Tom""",43,23
"""Helen""",20,20


In [3]:
df_pd = pd.DataFrame({'teacher':['John', 'Lucy', 'John', 'Tom', 'Helen'],
                   'price': np.random.randint(20,45,5),
                   'lessons': np.random.randint(10,25,5)
                   })
df_pd.head(5)

Unnamed: 0,teacher,price,lessons
0,John,44,18
1,Lucy,21,18
2,John,29,19
3,Tom,32,15
4,Helen,38,10


# Filter

In [4]:
df_filtered = df.filter(
                        (pl.col('teacher')=='John') & (pl.col('price')>10)
                        )

df_filtered

teacher,price,lessons
str,i32,i32
"""John""",29,10
"""John""",22,17


In [5]:
df_pd_filtered = df_pd[(df_pd['teacher']=='John') & (df_pd['price']>10)]

df_pd_filtered

Unnamed: 0,teacher,price,lessons
0,John,44,18
2,John,29,19


# Operations with columns

**Divide**

In [6]:
df = df.with_columns(
                    ((pl.col('price') / pl.col('lessons'))*100).alias('profit')
                    )

df.head(5)

teacher,price,lessons,profit
str,i32,i32,f64
"""John""",29,10,290.0
"""Lucy""",36,21,171.428571
"""John""",22,17,129.411765
"""Tom""",43,23,186.956522
"""Helen""",20,20,100.0


In [7]:
df_pd['profit'] = (df_pd['price'] / df_pd['lessons'])*100
df_pd

Unnamed: 0,teacher,price,lessons,profit
0,John,44,18,244.444444
1,Lucy,21,18,116.666667
2,John,29,19,152.631579
3,Tom,32,15,213.333333
4,Helen,38,10,380.0


**SQRT**

In [8]:
df = df.with_columns(
                        pl.col('profit').sqrt().alias('sqrt')
                    )

df.head(5)

teacher,price,lessons,profit,sqrt
str,i32,i32,f64,f64
"""John""",29,10,290.0,17.029386
"""Lucy""",36,21,171.428571,13.093073
"""John""",22,17,129.411765,11.375929
"""Tom""",43,23,186.956522,13.673205
"""Helen""",20,20,100.0,10.0


In [9]:
df_pd['sqrt'] = np.sqrt(df_pd['profit'])
df_pd.head(5)

Unnamed: 0,teacher,price,lessons,profit,sqrt
0,John,44,18,244.444444,15.634719
1,Lucy,21,18,116.666667,10.801234
2,John,29,19,152.631579,12.354415
3,Tom,32,15,213.333333,14.605935
4,Helen,38,10,380.0,19.493589


# Conditions

In [10]:
df_con = df.with_columns(
                        pl.when(pl.col('teacher')=='John').then(1).otherwise(0).alias('new column')
                        )
df_con.head(5)  

teacher,price,lessons,profit,sqrt,new column
str,i32,i32,f64,f64,i32
"""John""",29,10,290.0,17.029386,1
"""Lucy""",36,21,171.428571,13.093073,0
"""John""",22,17,129.411765,11.375929,1
"""Tom""",43,23,186.956522,13.673205,0
"""Helen""",20,20,100.0,10.0,0


In [11]:
df_pd['new column'] = np.where(df_pd['teacher']=='John', 1, 0)
df_pd.head(5)  

Unnamed: 0,teacher,price,lessons,profit,sqrt,new column
0,John,44,18,244.444444,15.634719,1
1,Lucy,21,18,116.666667,10.801234,0
2,John,29,19,152.631579,12.354415,1
3,Tom,32,15,213.333333,14.605935,0
4,Helen,38,10,380.0,19.493589,0


# Date

In [12]:
start = pd.to_datetime('2024-01-01')
end = start + pd.DateOffset(months=11)

In [13]:
df_new = pl.DataFrame(
                        pl.datetime_range(
                        start,
                        end,
                        interval='1mo',
                        eager=True,
                        ).alias('date')
                    )
df_new

date
datetime[μs]
2024-01-01 00:00:00
2024-02-01 00:00:00
2024-03-01 00:00:00
2024-04-01 00:00:00
2024-05-01 00:00:00
…
2024-08-01 00:00:00
2024-09-01 00:00:00
2024-10-01 00:00:00
2024-11-01 00:00:00


In [14]:
date_pd = pd.DataFrame({'date': pd.period_range('2024-01-01', periods=12, freq='M')
                        })
date_pd

Unnamed: 0,date
0,2024-01
1,2024-02
2,2024-03
3,2024-04
4,2024-05
5,2024-06
6,2024-07
7,2024-08
8,2024-09
9,2024-10


In [15]:
type(date_pd['date'])


pandas.core.series.Series