In [None]:
# Import required libraries for data processing and pattern analysis
import pandas as pd  # Data manipulation and analysis
import numpy as np   # Numerical operations
import re            # Regular expressions for text processing

In [None]:
# Define input and output file paths
# Update these paths to match your transaction data location and desired output location
input="credit_txn_v5.xlsx"
output="output_file.xlsx"

In [None]:
# Load transaction data from Excel file
df = pd.read_excel(input)

In [None]:
# Convert Date column to datetime format for time-based analysis
df["Date"] = pd.to_datetime(df["Date"])

In [None]:
# Function to calculate date periodicity score based on transaction date gaps
# This function evaluates how consistently transactions occur at expected intervals
# Scoring: 80 points for monthly (25-35 day gaps) or quarterly (85-95 day gaps) with >60% consistency
#          0 points for no periodic pattern detected
def date_periodicity_score(df, idx, lookback=6):
    row = df.loc[idx]
    narr = normalize(row["Narration"])

    # Find all similar transactions based on narration matching
    history = df[
        (df.index != idx) &
        (df["Narration"].str.lower().str.contains(narr[:10], regex=False))
    ]

    # Need at least 3 transactions to establish a pattern
    if len(history) < 3:
        return 0

    # Calculate gaps in days between consecutive transactions
    gaps = history["Date"].sort_values().diff().dt.days.dropna()

    # Check for monthly pattern (25-35 days) with >60% consistency
    if gaps.between(25, 35).mean() > 0.6:
        return 80
    # Check for quarterly pattern (85-95 days) with >60% consistency
    elif gaps.between(85, 95).mean() > 0.6:
        return 50
    else:
        return 0

In [None]:
# Function to calculate amount periodicity score based on transaction amount consistency
# This function evaluates whether transactions have consistent amounts (within ±5% tolerance)
# Scoring: 80 points if >70% of amounts are within ±5%
#          40 points if 40-70% of amounts are within ±5%
#          0 points if no amount pattern detected
def amount_periodicity_score(df, idx):
    row = df.loc[idx]
    amt = row["Amount"]
    narr = normalize(row["Narration"])

    # Find all similar transactions based on narration matching
    history = df[
        (df.index != idx) &
        (df["Narration"].str.lower().str.contains(narr[:10], regex=False))
    ]

    # Need at least 3 transactions to establish a pattern
    if len(history) < 3:
        return 0

    # Calculate the ratio of transactions with amounts within ±5% of current amount
    match_ratio = (
        abs(history["Amount"] - amt) / amt < 0.05
    ).mean()

    # Assign score based on amount consistency ratio
    if match_ratio > 0.7:
        return 80
    elif match_ratio > 0.4:
        return 40
    else:
        return 0

In [None]:
# Function to calculate overall confidence score
# Combines date and amount periodicity scores (0-160 range)
# Higher scores indicate stronger periodic patterns
def confidence_score(df, idx):
    return (
        date_periodicity_score(df, idx)
        + amount_periodicity_score(df, idx)
    )

In [None]:
# Calculate confidence score for each transaction
# This applies the combined scoring function to all rows in the dataset
df["score"] = [confidence_score(df, i) for i in df.index]