In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")


In [None]:
### Building synthetic transaction data

np.random.seed(42)
n_transactions = 1000
n_customers = 200
df = pd.DataFrame({
'transaction_id': range(1, n_transactions + 1),
'customer_id': np.random.choice([f'CUST_{i:04d}' for i in range(1, n_customers + 1)], n_transactions),
'amount': np.round(np.random.normal(150, 60, n_transactions), 2),
'transaction_type': np.random.choice(['Debit', 'Credit'], n_transactions),
'channel': np.random.choice(['ATM', 'Online', 'Branch', 'POS'], n_transactions),
'date': pd.date_range(start='2024-01-01', periods=n_transactions, freq='h')
})
print(df.shape)
print(df.head())

(1000, 6)
   transaction_id customer_id  amount transaction_type channel  \
0               1   CUST_0103  238.77           Credit     ATM   
1               2   CUST_0180  269.17           Credit     POS   
2               3   CUST_0093   58.62           Credit  Online   
3               4   CUST_0015   81.85            Debit  Branch   
4               5   CUST_0107  163.56           Credit  Online   

                 date  
0 2024-01-01 00:00:00  
1 2024-01-01 01:00:00  
2 2024-01-01 02:00:00  
3 2024-01-01 03:00:00  
4 2024-01-01 04:00:00  


In [3]:
# locate  the transactions greater than 200

row = df.loc[0]
if row['transaction_type'] == 'Debit' and row['amount'] > 200:
    print(f"Large withdrawal  by {row['customer_id']} of amount {row['amount']}")
else:
    print("No Flag needed")   

No Flag needed


In [4]:
## Looping over debit amount to find total debit amount
total_debit = 0
for i, row in df.iterrows():
    if row['transaction_type'] == 'Debit':
        total_debit += row['amount']
print(f"Total Debit Amount: {total_debit}")

Total Debit Amount: 75981.44


In [5]:
# Finding how many large transactions are there
large_flags = []  
for i, row in df.iterrows():
    if row['amount'] > 500:
        large_flags.append(row['transaction_id'])
print(f"Number of large transactions: {len(large_flags)}")

Number of large transactions: 0


In [6]:
# Flag a transaction if it is an online debit greater than 300
flags = []
for i, row in df.iterrows():
    if row['transaction_type'] == 'Debit' and row['channel'] == 'Online' and row['amount'] > 300:
        flags.append(row['transaction_id'])
print(f"Number of flagged online debit transactions: {len(flags)}")

Number of flagged online debit transactions: 2


In [7]:
# detect a negative amount in transactions and print warning
for i, row in df.iterrows():
    if row['amount'] < 0:
       print(f"Warning: Negative amount detected in transaction ID {row['transaction_id']}")
       break



In [8]:
# Check transactions over 400 in each channel
channels = df['channel'].unique()
for channel in channels:
    count = 0
    for i, row in df.iterrows():
        if row['channel'] == channel and row['amount'] > 400:
            count += 1
    print(f"Channel: {channel}, Transactions over 400: {count}")

Channel: ATM, Transactions over 400: 0
Channel: POS, Transactions over 400: 0
Channel: Online, Transactions over 400: 0
Channel: Branch, Transactions over 400: 0


In [9]:
# define a function for amount limits against amount_limit   
def approve_transaction(row, amount_limit=800):
    if row['amount'] < 0:
        return 'Error: Invalid amount'
    if row['amount'] > amount_limit:
        return 'Flag: Over limit'
    if row['transaction_type'] == 'Debit' and row['amount'] > 500:
        return 'Flag: Large debit'
    return 'Approved'
df['approval_status'] = df.apply(approve_transaction, axis=1)
print(df[['transaction_id', 'amount', 'transaction_type', 'approval_status']].head(5))

   transaction_id  amount transaction_type approval_status
0               1  238.77           Credit        Approved
1               2  269.17           Credit        Approved
2               3   58.62           Credit        Approved
3               4   81.85            Debit        Approved
4               5  163.56           Credit        Approved


In [10]:
# use list comprehension to find out how many approved transactions are there       
num_approved = len([a for a in df['approval_status'] if a == "Approved"])
print(f'Approved transactions: {num_approved}')

Approved transactions: 993


In [4]:
# Use vectorized conditional logic with numpy to mark 'HighRisk' where debits are over 600  
df['high_risk'] = np.where((df['transaction_type'] == 'Debit') & (df['amount'] > 600), True, False)
print(df[['transaction_id', 'amount', 'transaction_type', 'high_risk']].head(6))
print(f"Total high risk transactions: {df['high_risk'].sum()}")

   transaction_id  amount transaction_type  high_risk
0               1  238.77           Credit      False
1               2  269.17           Credit      False
2               3   58.62           Credit      False
3               4   81.85            Debit      False
4               5  163.56           Credit      False
5               6  200.38            Debit      False
Total high risk transactions: 0


In [None]:
# Find All 'ATM' withdrawals over average amount using filtering with control flow expressions
avg_amt = df['amount'].mean() # Calculate average amount
atm_large = df[(df['channel'] == 'ATM') & (df['amount'] > avg_amt) & (df['transaction_type'] == 'Debit')]
print(f'ATM withdrawals over average: {atm_large.shape[0]}')
print(atm_large[['transaction_id', 'amount']].head(3))

ATM withdrawals over average: 72
    transaction_id  amount
21              22  179.67
22              23  188.43
41              42  159.57


In [6]:
# Advanced: Using dictionaries for decision logic

# Dictionaries let you map control flow results to actions, making logic easier to maintain.
# Example: Assign transaction category using a mapping dict.

channel_map = {'ATM': 'Cash', 'Online': 'Digital', 'Branch': 'Personal', 'POS': 'Retail'}
df['category'] = df['channel'].map(channel_map)
print(df[['channel', 'category']].drop_duplicates())

  channel  category
0     ATM      Cash
1     POS    Retail
2  Online   Digital
3  Branch  Personal


In [7]:
#Error handling: Catching and logging problems

#Transaction logic should never crash your pipeline.
#Use try/except to catch and report invalid inputs or missing values.
# Example: Handle a missing 'amount' safely without stopping the process.

def robust_approval(row):
    try:
        if row['amount'] is None or np.isnan(row['amount']):
            return 'Error: Missing amount'
        elif row['amount'] < 0:
            return 'Error: Negative'
        else:
            return 'Approved'
    except Exception as e:
        print(f"Exception in transaction {row['transaction_id']}: {e}")
        return 'Error: Unknown'

In [8]:

test_row = {'transaction_id': 9999, 'amount': None}
print(robust_approval(test_row))

Error: Missing amount


In [9]:
# == Best practices and patterns in transaction logic
# Always validate input types and values.

# Use clear function names and comments for business rules.

# Write concise, layered if-elif-else statements. Avoid deeply nested logic if you can.

# Return errors, flags, or codes, not just print statements.

# Log problems for tracing when things go wrong.

# Use DataFrame vectorization for speed when working with big datasets.

# End-to-end challenge: Find and summarize suspicious activity
# Goal: List all customers with over three flagged transactions and their total at-risk amount.

# Solution steps:

# For each transaction, flag it if it is a debit over 400 or a credit over 1000.

# Summarize counts and sums per customer.

# Output customers with more than three flagged transactions.

df['flag'] = np.where(((df['transaction_type'] == 'Debit') & (df['amount'] > 400)) | ((df['transaction_type'] == 'Credit') & (df['amount'] > 1000)), 1, 0)
flagged = df[df['flag'] == 1]
summary = flagged.groupby('customer_id')['amount'].agg(['count', 'sum']).reset_index()
risky_customers = summary[summary['count'] > 3]
print(risky_customers.sort_values('sum', ascending=False))

Empty DataFrame
Columns: [customer_id, count, sum]
Index: []
