In [7]:
import numpy as np
def discretize_date(current_date, t):
    current_date = str(current_date)[:-7]
    cdate = datetime.strptime(current_date, '%Y-%m-%d %H:%M:%S')
    if t == 'hour_sin':
        return np.sin(2 * np.pi * cdate.hour/24.0)
    if t == 'hour_cos':
        return np.cos(2 * np.pi * cdate.hour/24.0)
    if t == 'day_sin':
        return np.sin(2 * np.pi * cdate.timetuple().tm_yday/365.0)
    if t == 'day_cos':
        return np.cos(2 * np.pi * cdate.timetuple().tm_yday/365.0)
    if t == 'week_day_sin':
        return np.sin(2 * np.pi * cdate.timetuple().tm_yday/7.0)
    if t == 'week_day_cos':
        return np.cos(2 * np.pi * cdate.timetuple().tm_yday/7.0)

In [12]:
import pandas as pd
import warnings
from datetime import datetime

def preprocess_data(data):
    if type(data) == dict:
        features = {}
        try:
            features['destinationCountry'] = data['destinationAmountDetails']['country']
        except KeyError:
            features['destinationCountry'] = None
        try:
            features['destinationCurrency'] = data['destinationAmountDetails']['transactionCurrency']
        except KeyError:
            features['destinationCurrency'] = None
        try:
            features['destinationAmount'] = data['destinationAmountDetails']['transactionAmount']
        except KeyError:
            features['destinationAmount'] = None
        try:
            features['originCountry'] = data['originAmountDetails']['country']
        except KeyError:
            features['originCountry'] = None
        try:
            features['originCurrency'] = data['originAmountDetails']['transactionCurrency']
        except KeyError:
            features['originCurrency'] = None
        try:
            features['originAmount'] = data['originAmountDetails']['transactionAmount']
        except KeyError:
            features['originAmount'] = None
        try:
            features['destinationMethod'] = data['destinationPaymentDetails']['method']
        except KeyError:
            features['destinationMethod'] = None
        try:
            features['originMethod'] = data['originPaymentDetails']['method']
        except KeyError:
            features['originMethod'] = None

        features['state'] = data.get('transactionState')
        features['transactionId'] = data.get('transactionId')
        features['originUserId'] = data.get('originUserId')
        features['destinationUserId'] = data.get('destinationUserId')
        try:
            features['datetime'] = datetime.fromtimestamp(int(data['timestamp']['$numberLong']) / 1000)
            date_types = ['hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'week_day_sin', 'week_day_cos']
            for dt in date_types:
                features[dt] = discretize_date(features['datetime'], dt)
            features.pop('datetime')
        except KeyError:
            warnings.warn("No timestamp provided")
 
    else:
        features = pd.DataFrame()
        features['state'] = data.get('transactionState')
        features['transactionId'] = data.get('transactionId')
        features['originUserId'] = data.get('originUserId')
        features['destinationUserId'] = data.get('destinationUserId')
        features['destinationCountry'] = data['destinationAmountDetails'].map(lambda x: x['country'])
        features['destinationCurrency'] = data['destinationAmountDetails'].map(lambda x: x['transactionCurrency'])
        features['destinationAmount'] = data['destinationAmountDetails'].map(lambda x: x['transactionAmount'])
        features['originCountry'] = data['originAmountDetails'].map(lambda x: x['country'])
        features['originCurrency'] = data['originAmountDetails'].map(lambda x: x['transactionCurrency'])
        features['originAmount'] = data['originAmountDetails'].map(lambda x: x['transactionAmount'])
        features['destinationMethod'] = data['destinationPaymentDetails'].map(lambda x: x['method'])
        features['originMethod'] = data['originPaymentDetails'].map(lambda x: x['method'])
        features.fillna('N/A', inplace = True)

        features['datetime'] = data['timestamp'].map(lambda x: datetime.fromtimestamp(int(x['$numberLong']) / 1000))
        date_types = ['hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'week_day_sin', 'week_day_cos']
        for dt in date_types:
            features[dt] = features['datetime'].apply(lambda x : discretize_date(x, dt))


        features.drop(columns = ['datetime'], inplace = True)
        
    return features

In [13]:
preprocess_data({
  "destinationAmountDetails": {
    "country": "IN",
    "transactionCurrency": "INR",
    "transactionAmount": 10132.8
  },
  "transactionState": "CREATED",
  "destinationPaymentDetails": {
    "method": "GENERIC_BANK_ACCOUNT"
  },
  "originPaymentDetails": {
    "method": "GENERIC_BANK_ACCOUNT"
  },
  "originAmountDetails": {
    "country": "IN",
    "transactionCurrency": "INR",
    "transactionAmount": 10132.8
  },
  "timestamp": {
    "$numberLong": "1662358419786"
  },
  "transactionId": "bd70fcaebc254c23b07b29fd994ba5f2",
  "originUserId": "29529892-22d3-4a74-b6f2-fbe1d5ee8b6f"
})

{'destinationCountry': 'IN',
 'destinationCurrency': 'INR',
 'destinationAmount': 10132.8,
 'originCountry': 'IN',
 'originCurrency': 'INR',
 'originAmount': 10132.8,
 'state': 'CREATED',
 'destinationMethod': 'GENERIC_BANK_ACCOUNT',
 'originMethod': 'GENERIC_BANK_ACCOUNT',
 'transactionId': 'bd70fcaebc254c23b07b29fd994ba5f2',
 'originUserId': '29529892-22d3-4a74-b6f2-fbe1d5ee8b6f',
 'destinationUserId': None,
 'hour_sin': 0.258819045102521,
 'hour_cos': -0.9659258262890682,
 'day_sin': -0.9033558023246842,
 'day_cos': -0.4288919379124841,
 'week_day_sin': 0.43388373911756595,
 'week_day_cos': -0.9009688679024154}