In [2]:
import json

class Analize:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None

    def load_data(self):
        with open(self.file_path, 'r') as file:
            self.data = json.load(file)

    def check_balance_validity(self):
        previous_balance = None

        for transaction in self.data['transactions']:
            current_balance = transaction['balance']['value']
            debit = transaction['debit']['value']
            credit = transaction['credit']['value']

            if previous_balance is None:
                calculated_balance = current_balance
            else:
                calculated_balance = previous_balance + credit - debit

            is_valid = (calculated_balance == current_balance)
            transaction['calculated_balance']['value'] = calculated_balance
            transaction['balance_check']['value'] = is_valid
            previous_balance = current_balance

    def add_status(self):
        self.data['status'] = {
            "analyze": True,
        }

    def save_data(self):
        with open(self.file_path, 'w') as file:
            json.dump(self.data, file, indent=4)

    def process_transactions(self):
        self.load_data()
        self.check_balance_validity()
        self.add_status()
        self.save_data()

In [4]:
import json
from datetime import datetime

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def parse_date(date_str):
    date_str = date_str.replace('.', ':')
    
    # List of possible date formats
    date_formats = [
        "%d/%m/%y %H:%M:%S",  # Format: dd/mm/yy hh:mm:ss
        "%d/%m/%Y %H:%M:%S",  # Format: dd/mm/yyyy hh:mm:ss
        "%Y-%m-%d %H:%M:%S",   # Format: yyyy-mm-dd hh:mm:ss
        "%d/%m/%y",            # Format: dd/mm/yy
        "%d/%m/%Y",            # Format: dd/mm/yyyy
        "%Y-%m-%d",            # Format: yyyy-mm-dd
    ]
    
    for fmt in date_formats:
        try:
            return datetime.strptime(date_str, fmt)
        except ValueError:
            continue 
    raise ValueError(f"Date format for '{date_str}' is not recognized.")

def get_weekend_transactions(transactions):

    weekend_transactions = []

    for transaction in transactions:
        datetime_str = transaction['datetime']['value']
        try:
            date_obj = parse_date(datetime_str)
            day_name = date_obj.strftime("%A")
            if day_name in ["Saturday", "Sunday"]:
                weekend_transactions.append(transaction)
        except ValueError as e:

            print(e)

    return weekend_transactions

def safe_convert(value):
    try:
        return float(value)
    except (ValueError, TypeError):
        return 0  

def check_fraud_transactions(weekend_transactions, threshold=100000000, rtgs_threshold=500000000):
    fraud_transactions = []

    for transaction in weekend_transactions:
        debit_value = transaction.get('debit', {}).get('value', 0)
        credit_value = transaction.get('credit', {}).get('value', 0)

        if debit_value >= threshold or credit_value >= threshold:
            if 'rtgs' in transaction.get('description', {}).get('value', '').lower():
                if debit_value >= rtgs_threshold or credit_value >= rtgs_threshold:
                    fraud_transactions.append(transaction)
                    transaction['classification'] = {"value": "Fraud"}
            else:
                fraud_transactions.append(transaction)
                
    return fraud_transactions


In [11]:
file_path = 'sample/extracted/bni-digics.json'
analyze = Analize(file_path)
analyze.process_transactions()

data = load_json_data(file_path)
weekend_transactions = get_weekend_transactions(data['transactions'])
fraud_transactions = check_fraud_transactions(weekend_transactions)
weekend_transactions


[{'id': 6,
  'datetime': {'value': '2023-11-19', 'confidence': 0.7073244452476501},
  'valuedate': {'value': '', 'confidence': 1},
  'description': {'value': '900514 TRANSFER KE Bpk RONI IKUSWANDI',
   'confidence': 0.7062601447105408},
  'debit': {'value': 14500000, 'confidence': 0.999974250793457},
  'credit': {'value': 0.0, 'confidence': 1},
  'balance': {'value': 48937187, 'confidence': 0.9964590668678284},
  'calculated_balance': {'value': 50937187.0},
  'balance_check': {'value': False},
  'classification': {'value': 'Include'}},
 {'id': 7,
  'datetime': {'value': '2023-11-19', 'confidence': 0.7508326768875122},
  'valuedate': {'value': '', 'confidence': 1},
  'description': {'value': 'ECHANNEL SIMSEM TRANSA 6106343685861064677 0802889270 Telemedicine',
   'confidence': 0.00022201164392754436},
  'debit': {'value': 0.0, 'confidence': 1},
  'credit': {'value': 20000000, 'confidence': 0.9999179840087891},
  'balance': {'value': 63437187, 'confidence': 0.9889115691184998},
  'calcul

In [None]:
import json
from datetime import datetime

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def parse_date(date_str):
    date_str = date_str.replace('.', ':')
    
    # List of possible date formats
    date_formats = [
        "%d/%m/%y %H:%M:%S",  # Format: dd/mm/yy hh:mm:ss
        "%d/%m/%Y %H:%M:%S",  # Format: dd/mm/yyyy hh:mm:ss
        "%Y-%m-%d %H:%M:%S",   # Format: yyyy-mm-dd hh:mm:ss
        "%d/%m/%y",            # Format: dd/mm/yy
        "%d/%m/%Y",            # Format: dd/mm/yyyy
        "%Y-%m-%d",            # Format: yyyy-mm-dd
    ]
    
    for fmt in date_formats:
        try:
            return datetime.strptime(date_str, fmt).date()  # Return only the date part
        except ValueError:
            continue 
    raise ValueError(f"Date format for '{date_str}' is not recognized.")

def categorize_transactions(data):
    transactions = data['transactions']
    categorized_transactions = []

    # Group transactions by date
    transactions_by_date = {}
    for transaction in transactions:
        date_str = transaction['datetime']['value']  # Access the datetime string
        date = parse_date(date_str)  # Parse the date
        if date not in transactions_by_date:
            transactions_by_date[date] = []
        transactions_by_date[date].append(transaction)

    # Categorize transactions
    for date, trans in transactions_by_date.items():
        # Create lists to hold debit and credit values
        debit_values = {}
        credit_values = {}

        # Populate debit and credit values
        for transaction in trans:
            debit = transaction['debit']['value']  # Access debit value correctly
            credit = transaction['credit']['value']  # Access credit value correctly
            transaction_id = transaction['id']

            if debit != 0:
                debit_values[transaction_id] = debit
            if credit != 0:
                credit_values[transaction_id] = credit

        # Determine inclusion/exclusion for each transaction
        for transaction in trans:
            transaction_id = transaction['id']
            debit = transaction['debit']['value']  # Access debit value correctly
            credit = transaction['credit']['value']  # Access credit value correctly

            # Check if the transaction should be included or excluded
            include = True
            if debit != 0 and debit in credit_values.values():
                include = False
            if credit != 0 and credit in debit_values.values():
                include = False

            # Append the transaction details to the categorized list
            categorized_transactions.append({
                'transaction_id': transaction_id,
                'date': date.isoformat(),  # Convert date to ISO format for output
                'debit': debit,
                'credit': credit,
                'category': "Include" if include else "Exclude"
            })

    return categorized_transactions
