## Overview:
Demo of replacing Jinja templates within a column using Pyspark

In [2]:
dbutils.library.installPyPI('jinja2')

In [3]:
import re
import pandas as pd

from jinja2 import Template
from pyspark.sql.functions import array, struct, lit

In [4]:
@udf('string')
def keyword_gen(col_struct, col_list):
    ''' 
    Render Jinja templates row wise.
    
    :param col_struct:  Dataframe struct type
    :param col_list:  List of column names
    :return:  String
    '''
    
    col_dict = {col : col_struct[col] if col_struct[col] else 'OUTPUT MISSING' for col in col_list}    
    t = Template('{templates}'.format(templates=col_dict['templates']))
    sent = t.render(col_dict)
    
    if re.search(r'\b(OUTPUT MISSING)\b', sent, flags=re.IGNORECASE):
        return ''
    return sent

In [5]:
# Create sample dataframe
df = pd.DataFrame({'col0': ['apple0', 'apple1', '', 'apple3'],
                   'col1': ['', 'banana1', 'banana2', 'banana3'],
                   'templates': ['{{col1}} text', '{{col1|upper}} text', 'this {{col0|title}} to {{col1|title}}', 'col0 {{col0|upper}} to {{col1|title}} col1']
                  })

df = spark.createDataFrame(df)

In [6]:
df_template = df.withColumn('render', keyword_gen(struct(df.columns), array([lit(x) for x in df.columns])))
display(df_template)

col0,col1,templates,render
apple0,,{{col1}} text,
apple1,banana1,{{col1|upper}} text,BANANA1 text
,banana2,this {{col0|title}} to {{col1|title}},
apple3,banana3,col0 {{col0|upper}} to {{col1|title}} col1,col0 APPLE3 to Banana3 col1
