![Polars.png](attachment:Polars.png)

<span style="font-family:MV Boli; font-weight:Bold; font-size:2.8em; color:#00b3e5;"> Polars
    
  - **pip install polars-lts-cpu**

In [1]:
import polars as pl

#####      

<span style="font-family:MV Boli; font-weight:Bold; font-size:2.3em; color:#00b3e5;"> Series Object

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Creating Series

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> from list

In [2]:
s = pl.Series([ 1, 2, 3, 4, 5 ])
s

1
2
3
4
5


In [3]:
s = pl.Series( 'S'
             , [ 1, 2, 3, 4, 5 ]
             , dtype = pl.Float32
             )
s

S
f32
1.0
2.0
3.0
4.0
5.0


<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> from dictionary

# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Accessing Data

In [4]:
s = pl.Series([0,10,20,30,40,50,60,70,80,90])
s

0
10
20
30
40
50
60
70
80
90


<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> first n

In [5]:
s[:3]

0
10
20


<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> last n

In [6]:
s[-3:]

70
80
90


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Math

In [7]:
S1 = pl.Series([0, 1, 2, 3, 4, 5])
S2 = pl.Series([0,10,20,30,40,50])

5*S1 + S2/2 + 100

100.0
110.0
120.0
130.0
140.0
150.0


In [8]:
S1 = pl.Series(['A', 'B', 'C', 'D', 'E', 'F'])
S2 = pl.Series(['0','10','20','30','40','50'])

S1 + '_' + S2

"""A_0"""
"""B_10"""
"""C_20"""
"""D_30"""
"""E_40"""
"""F_50"""


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:2.3em; color:#00b3e5;"> Data Frame

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Creating Data Frame

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> from dictionary

In [9]:
df = pl.DataFrame( { "Name"  : ['Bob', 'Sam', 'Anne', 'Rose', 'Lily'],
                     "Marks" : [  76 ,   25 ,    92 ,   100 ,   100 ], 
                     "Month" : ['May', 'Oct',  'Jan',  'Aug',  'Aug'], 
                     "Height": [ 192 ,  187 ,   165 ,   172 ,   172,], }
                 )
df

Name,Marks,Month,Height
str,i64,str,i64
"""Bob""",76,"""May""",192
"""Sam""",25,"""Oct""",187
"""Anne""",92,"""Jan""",165
"""Rose""",100,"""Aug""",172
"""Lily""",100,"""Aug""",172


<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> from list

In [10]:
df = pl.DataFrame(  data  = [ ['Bob' ,   76  , 'May' ,  192   ], 
                              ['Sam' ,   25  , 'Oct' ,  187   ], 
                              ['Anne',   92  , 'Jan' ,  165   ],
                              ['Rose',  100  , 'Aug' ,  172   ],
                              ['Lily',  100  , 'Aug' ,  172   ],
                            ]
                 , schema = ['Name','Marks','Month','Height']
                 , orient =  'row'
                 )
df

Name,Marks,Month,Height
str,i64,str,i64
"""Bob""",76,"""May""",192
"""Sam""",25,"""Oct""",187
"""Anne""",92,"""Jan""",165
"""Rose""",100,"""Aug""",172
"""Lily""",100,"""Aug""",172


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Dropping

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> indexes

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> features

In [11]:
df.drop(['Name','Height'])

Marks,Month
i64,str
76,"""May"""
25,"""Oct"""
92,"""Jan"""
100,"""Aug"""
100,"""Aug"""


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Viewing

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> rows

In [12]:
df.filter( (pl.col('Month' )   ==  'Aug'  ) 
         & (pl.col('Marks' )    >   95    ) 
         & (pl.col('Height').is_not_nan() ) 
         & (pl.col('Height').is_between( 170, 175) )  
         )

Name,Marks,Month,Height
str,i64,str,i64
"""Rose""",100,"""Aug""",172
"""Lily""",100,"""Aug""",172


<span style="font-family:MV Boli; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> cols

In [13]:
df[ : , ['Month','Marks'] ]

Month,Marks
str,i64
"""May""",76
"""Oct""",25
"""Jan""",92
"""Aug""",100
"""Aug""",100


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Statistics

In [14]:
df.describe()

statistic,Name,Marks,Month,Height
str,str,f64,str,f64
"""count""","""5""",5.0,"""5""",5.0
"""null_count""","""0""",0.0,"""0""",0.0
"""mean""",,78.6,,177.6
"""std""",,31.524594,,11.371016
"""min""","""Anne""",25.0,"""Aug""",165.0
"""25%""",,76.0,,172.0
"""50%""",,92.0,,172.0
"""75%""",,100.0,,187.0
"""max""","""Sam""",100.0,"""Oct""",192.0


In [15]:
df.count()

Name,Marks,Month,Height
u32,u32,u32,u32
5,5,5,5


In [16]:
df[ ['Marks','Height'] ].mean()

Marks,Height
f64,f64
78.6,177.6


In [17]:
df[ ['Marks','Height'] ].median()

Marks,Height
f64,f64
92.0,172.0


In [21]:
df['Marks'].mode()

Marks
i64
100


In [22]:
df[ ['Marks','Height'] ].std()

Marks,Height
f64,f64
31.524594,11.371016


In [23]:
df[ ['Marks','Height'] ].min()

Marks,Height
i64,i64
25,165


In [24]:
df[ ['Marks','Height'] ].max()

Marks,Height
i64,i64
100,192


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Applying Functions

In [28]:
[ x*2 for x in df['Marks'] ]

[152, 50, 184, 200, 200]

In [27]:
def Doubler(x):
    return x*2

Doubler( df['Marks'] )

Marks
i64
152
50
184
200
200


In [59]:
def New_Feature( df, ft ):
    
    s = df[ft]
    s = s*10
    s = pl.DataFrame( s, schema=[("new_ft", pl.Int64)] )
    
    return s


New = New_Feature( df, 'Marks' )
df2 = pl.concat( [df, New], how='horizontal' )
print( df2 )

shape: (5, 5)
┌──────┬───────┬───────┬────────┬────────┐
│ Name ┆ Marks ┆ Month ┆ Height ┆ new_ft │
│ ---  ┆ ---   ┆ ---   ┆ ---    ┆ ---    │
│ str  ┆ i64   ┆ str   ┆ i64    ┆ i64    │
╞══════╪═══════╪═══════╪════════╪════════╡
│ Bob  ┆ 76    ┆ May   ┆ 192    ┆ 760    │
│ Sam  ┆ 25    ┆ Oct   ┆ 187    ┆ 250    │
│ Anne ┆ 92    ┆ Jan   ┆ 165    ┆ 920    │
│ Rose ┆ 100   ┆ Aug   ┆ 172    ┆ 1000   │
│ Lily ┆ 100   ┆ Aug   ┆ 172    ┆ 1000   │
└──────┴───────┴───────┴────────┴────────┘


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> SubCat Count

In [43]:
df['Month'].n_unique()

4

In [87]:
print(  df['Month'].value_counts()  )

shape: (4, 2)
┌───────┬───────┐
│ Month ┆ count │
│ ---   ┆ ---   │
│ str   ┆ u32   │
╞═══════╪═══════╡
│ Jan   ┆ 1     │
│ Oct   ┆ 1     │
│ May   ┆ 1     │
│ Aug   ┆ 2     │
└───────┴───────┘


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Sorting

In [86]:
print(  df.sort(['Month','Marks'], descending=[False,True])  )

shape: (5, 4)
┌──────┬───────┬───────┬────────┐
│ Name ┆ Marks ┆ Month ┆ Height │
│ ---  ┆ ---   ┆ ---   ┆ ---    │
│ str  ┆ i64   ┆ str   ┆ i64    │
╞══════╪═══════╪═══════╪════════╡
│ Rose ┆ 100   ┆ Aug   ┆ 172    │
│ Lily ┆ 100   ┆ Aug   ┆ 172    │
│ Anne ┆ 92    ┆ Jan   ┆ 165    │
│ Bob  ┆ 76    ┆ May   ┆ 192    │
│ Sam  ┆ 25    ┆ Oct   ┆ 187    │
└──────┴───────┴───────┴────────┘


# 

<span style="font-family:MV Boli; font-weight:Bold; font-size:1.8em; color:#00b3e5;"> Combining

<span style="font-family:Arial; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> concat ( vertical )

In [80]:
df1 = pl.DataFrame({
    'Col_1' : [  10   ,   11   ,    12   ,    13   ,   14   ],
    'Col_2' : ['Apple', 'Mango', 'Banana', 'Cherry', 'Peach']
})

df2 = pl.DataFrame({
    'Col_1' : [  15  ,      16     ,      17     ,     18   ,    19    ],
    'Col_2' : ['Guava', 'Raspberry', 'Strawberry', 'Apricot', 'Orange' ]
})

df3 = pl.concat( [df1, df2]
               ,  how  =  'vertical'  
               ) 

print( df3 )

shape: (10, 2)
┌───────┬────────────┐
│ Col_1 ┆ Col_2      │
│ ---   ┆ ---        │
│ i64   ┆ str        │
╞═══════╪════════════╡
│ 10    ┆ Apple      │
│ 11    ┆ Mango      │
│ 12    ┆ Banana     │
│ 13    ┆ Cherry     │
│ 14    ┆ Peach      │
│ 15    ┆ Guava      │
│ 16    ┆ Raspberry  │
│ 17    ┆ Strawberry │
│ 18    ┆ Apricot    │
│ 19    ┆ Orange     │
└───────┴────────────┘


<span style="font-family:Arial; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> concat ( horizontal )

In [82]:
df1 = pl.DataFrame({
    'Col_1' : [  10   ,   11   ,    12   ,    13   ,   14   ],
    'Col_2' : ['Apple', 'Mango', 'Banana', 'Cherry', 'Peach']
})

df2 = pl.DataFrame({
    'Col_3' : [  15  ,      16     ,      17     ,     18   ,    19    ],
    'Col_4' : ['Guava', 'Raspberry', 'Strawberry', 'Apricot', 'Orange' ]
})

df3 = pl.concat( [df1, df2]
               ,  how  =  'horizontal'  
               ) 

print( df3 )

shape: (5, 4)
┌───────┬────────┬───────┬────────────┐
│ Col_1 ┆ Col_2  ┆ Col_3 ┆ Col_4      │
│ ---   ┆ ---    ┆ ---   ┆ ---        │
│ i64   ┆ str    ┆ i64   ┆ str        │
╞═══════╪════════╪═══════╪════════════╡
│ 10    ┆ Apple  ┆ 15    ┆ Guava      │
│ 11    ┆ Mango  ┆ 16    ┆ Raspberry  │
│ 12    ┆ Banana ┆ 17    ┆ Strawberry │
│ 13    ┆ Cherry ┆ 18    ┆ Apricot    │
│ 14    ┆ Peach  ┆ 19    ┆ Orange     │
└───────┴────────┴───────┴────────────┘


<span style="font-family:Arial; font-weight:Bold; font-size:1.5em; color:#00b3e5;"> join

In [76]:
df1 = pl.DataFrame({ 'emp_id'  : ['101','102','103','104'],
                     'dept'    : ['content','content','sales','tech'],
                     'gender'  : ['m','f','m','m'],
                     'salary'  : [12000,15000,18000,16000],
                   } 
                   )
df2 = pl.DataFrame({ 'emp_id'  : ['101','103','104','105'],
                     'location': ['New York','Boston','Boston','New Jersey'],
                     'distance': [16,17,44,21],
                     'saving'  : [25000,27000,12000,17000],
                   } 
                   )

df3 = df1.join( df2
              , on       = 'emp_id'
              , how      =  'full'    # 'inner', 'left', 'right', 'full', 'semi', 'anti', 'cross'
              , coalesce =   True 
              )

print( df3 )

shape: (5, 7)
┌────────┬─────────┬────────┬────────┬────────────┬──────────┬────────┐
│ emp_id ┆ dept    ┆ gender ┆ salary ┆ location   ┆ distance ┆ saving │
│ ---    ┆ ---     ┆ ---    ┆ ---    ┆ ---        ┆ ---      ┆ ---    │
│ str    ┆ str     ┆ str    ┆ i64    ┆ str        ┆ i64      ┆ i64    │
╞════════╪═════════╪════════╪════════╪════════════╪══════════╪════════╡
│ 101    ┆ content ┆ m      ┆ 12000  ┆ New York   ┆ 16       ┆ 25000  │
│ 103    ┆ sales   ┆ m      ┆ 18000  ┆ Boston     ┆ 17       ┆ 27000  │
│ 104    ┆ tech    ┆ m      ┆ 16000  ┆ Boston     ┆ 44       ┆ 12000  │
│ 105    ┆ null    ┆ null   ┆ null   ┆ New Jersey ┆ 21       ┆ 17000  │
│ 102    ┆ content ┆ f      ┆ 15000  ┆ null       ┆ null     ┆ null   │
└────────┴─────────┴────────┴────────┴────────────┴──────────┴────────┘


#####   

#####   