In [423]:
import polars as pl
import numpy as np
import pandas as pd

# Create Dataframe

In [424]:
df = pl.DataFrame({'teacher':['John', 'Lucy', 'John', 'Tom', 'Helen'],
                   'price': np.random.randint(20,45,5),
                   'lessons': np.random.randint(10,25,5)
                   })
df.head(5)

teacher,price,lessons
str,i32,i32
"""John""",44,24
"""Lucy""",39,19
"""John""",37,14
"""Tom""",32,19
"""Helen""",44,10


In [425]:
df_pd = pd.DataFrame({'teacher':['John', 'Lucy', 'John', 'Tom', 'Helen'],
                   'price': np.random.randint(20,45,5),
                   'lessons': np.random.randint(10,25,5)
                   })
df_pd.head(5)

Unnamed: 0,teacher,price,lessons
0,John,23,22
1,Lucy,35,18
2,John,34,22
3,Tom,44,15
4,Helen,31,24


# Filter

In [426]:
df_filtered = df.filter(
                        (pl.col('teacher')=='John') & (pl.col('price')>10)
                        )

df_filtered

teacher,price,lessons
str,i32,i32
"""John""",44,24
"""John""",37,14


In [427]:
df_pd_filtered = df_pd[(df_pd['teacher']=='John') & (df_pd['price']>10)]

df_pd_filtered

Unnamed: 0,teacher,price,lessons
0,John,23,22
2,John,34,22


# Operations with columns

**Divide**

In [428]:
df = df.with_columns(
                    ((pl.col('price') / pl.col('lessons'))*100).alias('profit')
                    )

df.head(5)

teacher,price,lessons,profit
str,i32,i32,f64
"""John""",44,24,183.333333
"""Lucy""",39,19,205.263158
"""John""",37,14,264.285714
"""Tom""",32,19,168.421053
"""Helen""",44,10,440.0


In [429]:
df_pd['profit'] = (df_pd['price'] / df_pd['lessons'])*100
df_pd

Unnamed: 0,teacher,price,lessons,profit
0,John,23,22,104.545455
1,Lucy,35,18,194.444444
2,John,34,22,154.545455
3,Tom,44,15,293.333333
4,Helen,31,24,129.166667


**SQRT**

In [430]:
df = df.with_columns(
                        pl.col('profit').sqrt().alias('sqrt')
                    )

df.head(5)

teacher,price,lessons,profit,sqrt
str,i32,i32,f64,f64
"""John""",44,24,183.333333,13.540064
"""Lucy""",39,19,205.263158,14.327008
"""John""",37,14,264.285714,16.256867
"""Tom""",32,19,168.421053,12.977714
"""Helen""",44,10,440.0,20.976177


In [431]:
df_pd['sqrt'] = np.sqrt(df_pd['profit'])
df_pd.head(5)

Unnamed: 0,teacher,price,lessons,profit,sqrt
0,John,23,22,104.545455,10.224747
1,Lucy,35,18,194.444444,13.944334
2,John,34,22,154.545455,12.431631
3,Tom,44,15,293.333333,17.126977
4,Helen,31,24,129.166667,11.365151


# Conditions

In [432]:
df_con = df.with_columns(
                        pl.when(pl.col('teacher')=='John').then(1).otherwise(0).alias('new column')
                        )
df_con.head(5)  

teacher,price,lessons,profit,sqrt,new column
str,i32,i32,f64,f64,i32
"""John""",44,24,183.333333,13.540064,1
"""Lucy""",39,19,205.263158,14.327008,0
"""John""",37,14,264.285714,16.256867,1
"""Tom""",32,19,168.421053,12.977714,0
"""Helen""",44,10,440.0,20.976177,0


In [433]:
df_pd['new column'] = np.where(df_pd['teacher']=='John', 1, 0)
df_pd.head(5)  

Unnamed: 0,teacher,price,lessons,profit,sqrt,new column
0,John,23,22,104.545455,10.224747,1
1,Lucy,35,18,194.444444,13.944334,0
2,John,34,22,154.545455,12.431631,1
3,Tom,44,15,293.333333,17.126977,0
4,Helen,31,24,129.166667,11.365151,0


# Date

In [434]:
start = pd.to_datetime('2024-01-01')
end = start + pd.DateOffset(months=11)

In [435]:
df_new = pl.DataFrame(
                        pl.datetime_range(
                        start,
                        end,
                        interval='1mo',
                        eager=True,
                        ).alias('date')
                    )
df_new

date
datetime[μs]
2024-01-01 00:00:00
2024-02-01 00:00:00
2024-03-01 00:00:00
2024-04-01 00:00:00
2024-05-01 00:00:00
…
2024-08-01 00:00:00
2024-09-01 00:00:00
2024-10-01 00:00:00
2024-11-01 00:00:00


In [436]:
date_pd = pd.DataFrame({'date': pd.period_range('2024-01-01', periods=12, freq='M')
                        })
date_pd

Unnamed: 0,date
0,2024-01
1,2024-02
2,2024-03
3,2024-04
4,2024-05
5,2024-06
6,2024-07
7,2024-08
8,2024-09
9,2024-10


In [437]:
type(date_pd['date'])


pandas.core.series.Series