### Tagging Rows

Examples of how to tag rows and manipulate tagging.

In [None]:
import pandas as pd
import polars as pl
import numpy as np
import random
from rtsvg import *
rt = RACETrack()

### Pandas Versions

In [None]:
r = [random.random() for x in range(10)]
df = pd.DataFrame({'x':r})
df['tag'] = None

#
# Replacing a tag
#
t_or_f = (df.x >= 0.75)
df.loc[t_or_f,'tag']        = 'gte 0.75'
df.loc[(df.x < 0.5), 'tag'] = 'lt 0.50'

df.sort_values('x')

In [None]:
#
# Adding to a tag... deduplication occurs as well
#
def addToTag(orig, to_add):
    if orig is None:
        orig = ''
    _set_ = set(orig.split('|'))
    _set_.add(str(to_add))
    _joined_ = '|'.join(sorted(list(_set_)))
    if _joined_[0] == '|':
        _joined_ = _joined_[1:]
    return _joined_

t_or_f = (df.x >= 0.5)
df.loc[t_or_f, 'tag'] = df.loc[t_or_f, 'tag'].apply(lambda x: addToTag(x, 'gte 0.5'))
t_or_f = (df.x < 0.75)
df.loc[t_or_f, 'tag'] = df.loc[t_or_f, 'tag'].apply(lambda x: addToTag(x, 'lt 0.75'))
t_or_f = (df.x < 0.8)
df.loc[t_or_f, 'tag'] = df.loc[t_or_f, 'tag'].apply(lambda x: addToTag(x, 'lt 0.80'))

df.sort_values('x')

In [None]:
#
# Filtering to a tag (could be other tags... so looking for an exact match for the whole tag string)
#
def tagIsPresent(x,tag):
    if x is None:
        x = ''
    _set_ = set(x.split('|'))
    return tag in _set_

df[df['tag'].apply(lambda x: tagIsPresent(x,'lt 0.75'))].sort_values('x')

In [None]:
#
# What if we just have a subset dataframe?
#
df_sub = df.sort_values('x')[0:3]
df_sub['tag'] = 'new'
df.update(df_sub)
df.sort_values('x')

### Polars Versions

In [None]:
dfpl = pl.DataFrame(df).drop(['tag'])
dfpl = dfpl.with_columns(pl.lit(None).alias('tag').cast(str))
dfpl

In [None]:
#
# Replacing a tag
#
dfpl = dfpl.with_columns(pl.when(pl.col("x") >= 0.75).then(pl.lit('gte 0.75')).otherwise(pl.col('tag')).alias('tag'))
dfpl = dfpl.with_columns(pl.when(pl.col("x") <  0.50).then(pl.lit('lt 0.50')).otherwise(pl.col('tag')).alias('tag'))
dfpl.sort('x')

In [None]:
#
# Adding to a tag... deduplication occurs as well
#
_fn_ = lambda x: addToTag(x,'gte 0.5')
dfpl = dfpl.with_columns(pl.when(pl.col('x') >= 0.5).then(pl.col('tag').map_elements(_fn_, skip_nulls=False)).otherwise(pl.col('tag')))
_fn_ = lambda x: addToTag(x,'lt 0.75')
dfpl = dfpl.with_columns(pl.when(pl.col('x') <  0.75).then(pl.col('tag').map_elements(_fn_, skip_nulls=False)).otherwise(pl.col('tag')))
_fn_ = lambda x: addToTag(x,'lt 0.80')
dfpl = dfpl.with_columns(pl.when(pl.col('x') <  0.80).then(pl.col('tag').map_elements(_fn_, skip_nulls=False)).otherwise(pl.col('tag')))
dfpl.sort('x')

In [None]:
#
# Filtering to a tag (could be other tags... so looking for an exact match for the whole tag string)
#
dfpl.filter(pl.col('tag').map_elements(lambda x: tagIsPresent(x,'lt 0.75'))).sort('x')

In [None]:
#
# What if we just have a subset dataframe?
# ... because polars doesn't have the notion of an index, the rows will need to
#     be unique...
#
dfpl_sub = dfpl.filter(pl.col('x') < 0.3)
dfpl_sub = dfpl_sub.with_columns(pl.lit('new').alias('tag'))
dfpl = dfpl.update(dfpl_sub, on='x')
dfpl.sort('x')