### DATA TRANSFORMATION & CLEAN

##### Transform epouch datetime (merch_eff_time & merch_last_update_time)
###### - the timezome is currently store as parameter in json

In [1]:
# Below function is aacepting epouch miliseconds and translate to datetime with zone

def udf_parse_epoch(_epoch, _timezone):
    import pytz 
    from datetime import datetime, timedelta

    _result = ""

    try:
        _dt = datetime.fromtimestamp(int(str(_epoch)[:10]))
        timezone = pytz.timezone(_timezone)
        _result = str(timezone.localize(_dt))
    except Exception as e:
        pass

    return _result  

udf_parse_epoch(1325528390443962, "Asia/Kuala_Lumpur")

'2012-01-02 18:19:50+08:00'

##### Transform Credit Card number (cc_num)
###### - all credit card number shoud be masked, however in this example where json is NOT first source, thus 1-way masked should be recommended

In [2]:
# Below function accepting input and masked all digit except first & last section

def udf_parse_cc_num(_value):
    return str(_value)[:6] + ('#' * (len(str(_value)) - 10)) + str(_value)[-4:]

udf_parse_cc_num(4452366298769043)

'445236######9043'

##### Transform merchant name (merchant)

In [3]:
# Below function accepting input and removed "fraud_"

def udf_parse_merchant_name(_value):

    try:
        return _value.replace("fraud_", "")
    except Exception as e:
        return ""

udf_parse_merchant_name("fraud_Leffler-Goldner")

'Leffler-Goldner'

##### Transform datetime (trans_date_trans_time)

In [4]:
# Below function added timezone into existing value

def udf_parse_datetime(_datestring, _timezone):
    from dateutil import parser
    import pytz 

    _result = ""
    try:
        _dt = parser.parse(_datestring)
        timezone = pytz.timezone(_timezone)
        _result = str(timezone.localize(_dt))
    except Exception as e:
        pass

    return _result 

udf_parse_datetime("2019-01-02 18:00:09", "Asia/Kuala_Lumpur")

'2019-01-02 18:00:09+08:00'

##### Transform name (person_name)

In [5]:
# Below function retrieve first and last name (person_name)

def udf_parse_name(_name, _name_position ) -> str:
    import re

    _separator = ',|@|/'
    _result = ""

    try:
        _result = re.split(_separator, _name.replace("|",","))[_name_position]
        _result = re.sub('[^A-Za-z0-9]+', '', _result)
        return _result

    except Exception as e:
        return _result


print("First name : {0}".format(udf_parse_name("Jeremy|Roberson!!!", 0))) 
print("Last name : {0}".format(udf_parse_name("Jeremy|Roberson!!!", 1)))

First name : Jeremy
Last name : Roberson
